Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5f12addc
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
337
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“0be9747b39568ff4974335836369726f8b3bcf35”上不存在“crypto/evp/pmeth_lib.c”
提交
5f12addc
编写于
5月 15, 2020
作者:
D
dingminghui
提交者:
jackzhang235
5月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mir): fix mlu cast and layout error insert
上级
56c1f666
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
146 addition
and
97 deletion
+146
-97
lite/core/mir/mlu_postprocess_pass.cc
lite/core/mir/mlu_postprocess_pass.cc
+134
-83
lite/kernels/mlu/bridges/cast_op.cc
lite/kernels/mlu/bridges/cast_op.cc
+6
-11
lite/kernels/mlu/bridges/layout_op.cc
lite/kernels/mlu/bridges/layout_op.cc
+2
-2
lite/kernels/mlu/io_copy_compute.cc
lite/kernels/mlu/io_copy_compute.cc
+1
-0
lite/kernels/mlu/subgraph_compute.h
lite/kernels/mlu/subgraph_compute.h
+3
-1
未找到文件。
lite/core/mir/mlu_postprocess_pass.cc
浏览文件 @
5f12addc
...
@@ -134,8 +134,8 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
...
@@ -134,8 +134,8 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cpp
::
OpDesc
op_desc
;
cpp
::
OpDesc
op_desc
;
op_desc
.
SetType
(
op_type
);
op_desc
.
SetType
(
op_type
);
if
(
op_type
==
"cast"
)
{
if
(
op_type
==
"cast"
)
{
op_desc
.
SetAttr
<
int
>
(
"in_dtype"
,
4
);
// FP
32
op_desc
.
SetAttr
<
int
>
(
"in_dtype"
,
4
);
// FP
16
op_desc
.
SetAttr
<
int
>
(
"out_dtype"
,
5
);
// FP
16
op_desc
.
SetAttr
<
int
>
(
"out_dtype"
,
5
);
// FP
32
op_desc
.
SetInput
(
"X"
,
{
cast_arg_name
});
op_desc
.
SetInput
(
"X"
,
{
cast_arg_name
});
op_desc
.
SetOutput
(
"Out"
,
{
cur_node
->
AsArg
().
name
});
op_desc
.
SetOutput
(
"Out"
,
{
cur_node
->
AsArg
().
name
});
}
else
if
(
op_type
==
"layout"
)
{
}
else
if
(
op_type
==
"layout"
)
{
...
@@ -241,17 +241,27 @@ void MLUPostprocessPass::InsertBefore(SSAGraph* graph,
...
@@ -241,17 +241,27 @@ void MLUPostprocessPass::InsertBefore(SSAGraph* graph,
inst_type
->
precision
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
inst_type
->
layout
()));
}
}
}
// io copy
// io copy
cur_node
=
InsertCastBefore
(
cur_node
=
InsertCastBefore
(
"io_copy"
,
"io_copy"
,
name_prefix
+
"io_copy"
,
name_prefix
+
"io_copy"
,
graph
,
graph
,
cur_node
,
cur_node
,
inst_node
,
inst_node
,
LiteType
::
GetTensorTy
(
LiteType
::
GetTensorTy
(
inst_type
->
target
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
inst_type
->
target
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
}
else
{
// io copy
cur_node
=
InsertCastBefore
(
"io_copy"
,
name_prefix
+
"io_copy"
,
graph
,
cur_node
,
inst_node
,
LiteType
::
GetTensorTy
(
inst_type
->
target
(),
head_type
->
precision
(),
head_type
->
layout
()));
}
// connect cur_node to inst_node
// connect cur_node to inst_node
DirectedLink
(
cur_node
,
inst_node
);
DirectedLink
(
cur_node
,
inst_node
);
...
@@ -393,17 +403,26 @@ void MLUPostprocessPass::InsertAfter(SSAGraph* graph,
...
@@ -393,17 +403,26 @@ void MLUPostprocessPass::InsertAfter(SSAGraph* graph,
inst_type
->
precision
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
inst_type
->
layout
()));
}
}
}
// io copy
// io copy
cur_node
=
InsertCastAfter
(
cur_node
=
InsertCastAfter
(
"io_copy"
,
"io_copy"
,
name_prefix
+
"io_copy"
,
name_prefix
+
"io_copy"
,
graph
,
graph
,
cur_node
,
cur_node
,
inst_node
,
inst_node
,
LiteType
::
GetTensorTy
(
LiteType
::
GetTensorTy
(
inst_type
->
target
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
inst_type
->
target
(),
inst_type
->
precision
(),
inst_type
->
layout
()));
}
else
{
cur_node
=
InsertCastAfter
(
"io_copy"
,
name_prefix
+
"io_copy"
,
graph
,
cur_node
,
inst_node
,
LiteType
::
GetTensorTy
(
inst_type
->
target
(),
tail_type
->
precision
(),
tail_type
->
layout
()));
}
// connect cur_node to inst_node
// connect cur_node to inst_node
DirectedLink
(
inst_node
,
cur_node
);
DirectedLink
(
inst_node
,
cur_node
);
...
@@ -504,6 +523,8 @@ void MLUPostprocessPass::GatherAndModifyFirstConvNodes(SSAGraph* graph) {
...
@@ -504,6 +523,8 @@ void MLUPostprocessPass::GatherAndModifyFirstConvNodes(SSAGraph* graph) {
void
MLUPostprocessPass
::
ModifyInputOutputDataType
(
SSAGraph
*
graph
)
{
void
MLUPostprocessPass
::
ModifyInputOutputDataType
(
SSAGraph
*
graph
)
{
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
if
(
node
.
IsStmt
()
&&
node
.
AsStmt
().
op_type
()
==
"subgraph"
)
{
if
(
node
.
IsStmt
()
&&
node
.
AsStmt
().
op_type
()
==
"subgraph"
)
{
const
Type
*
subgraph_arg_type
=
nullptr
;
GetSubgraphOpArgType
(
&
node
,
&
subgraph_arg_type
,
graph
);
for
(
auto
&
in_node
:
node
.
inlinks
)
{
for
(
auto
&
in_node
:
node
.
inlinks
)
{
const
auto
*
in_node_type
=
in_node
->
AsArg
().
type
;
const
auto
*
in_node_type
=
in_node
->
AsArg
().
type
;
VLOG
(
4
)
<<
"MLU subgraph input type: "
<<
in_node
->
AsArg
().
name
VLOG
(
4
)
<<
"MLU subgraph input type: "
<<
in_node
->
AsArg
().
name
...
@@ -525,6 +546,7 @@ void MLUPostprocessPass::ModifyInputOutputDataType(SSAGraph* graph) {
...
@@ -525,6 +546,7 @@ void MLUPostprocessPass::ModifyInputOutputDataType(SSAGraph* graph) {
}
}
for
(
auto
&
out_node
:
node
.
outlinks
)
{
for
(
auto
&
out_node
:
node
.
outlinks
)
{
const
auto
*
out_node_type
=
out_node
->
AsArg
().
type
;
const
auto
*
out_node_type
=
out_node
->
AsArg
().
type
;
auto
&
out_arg
=
out_node
->
AsArg
();
VLOG
(
4
)
<<
"MLU subgraph output type: "
<<
out_node
->
AsArg
().
name
VLOG
(
4
)
<<
"MLU subgraph output type: "
<<
out_node
->
AsArg
().
name
<<
*
out_node_type
;
<<
*
out_node_type
;
if
(
out_node
->
AsArg
().
is_weight
||
out_node
->
AsArg
().
is_persist
)
{
if
(
out_node
->
AsArg
().
is_weight
||
out_node
->
AsArg
().
is_persist
)
{
...
@@ -536,14 +558,24 @@ void MLUPostprocessPass::ModifyInputOutputDataType(SSAGraph* graph) {
...
@@ -536,14 +558,24 @@ void MLUPostprocessPass::ModifyInputOutputDataType(SSAGraph* graph) {
TARGET
(
kMLU
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNHWC
));
TARGET
(
kMLU
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNHWC
));
}
else
if
(
out_node_type
->
precision
()
==
PRECISION
(
kAny
)
&&
}
else
if
(
out_node_type
->
precision
()
==
PRECISION
(
kAny
)
&&
out_node
->
outlinks
.
empty
())
{
out_node
->
outlinks
.
empty
())
{
out_
node
->
AsArg
()
.
is_persist
=
true
;
out_
arg
.
is_persist
=
true
;
out_
node
->
AsArg
()
.
type
=
LiteType
::
GetTensorTy
(
out_
arg
.
type
=
LiteType
::
GetTensorTy
(
TARGET
(
kMLU
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNHWC
));
TARGET
(
kMLU
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNHWC
));
}
else
{
}
else
{
CHECK
(
out_node_type
->
precision
()
==
PRECISION
(
kFloat
))
CHECK
(
out_node_type
->
precision
()
==
PRECISION
(
kFloat
))
<<
"MLU subgraph unexpected common output type!"
;
<<
"MLU subgraph unexpected common output type!"
;
out_node
->
AsArg
().
type
=
LiteType
::
GetTensorTy
(
if
(
out_node
->
outlinks
.
empty
())
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
out_arg
.
type
=
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
subgraph_arg_type
->
precision
(),
DATALAYOUT
(
kNHWC
));
VLOG
(
5
)
<<
"unused output node type: "
<<
out_arg
.
name
<<
out_node_type
->
name
();
}
else
{
out_arg
.
type
=
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
VLOG
(
5
)
<<
"output node type: "
<<
out_arg
.
name
<<
out_node_type
->
name
();
}
}
}
const
auto
target
=
out_node
->
AsArg
().
type
->
target
();
const
auto
target
=
out_node
->
AsArg
().
type
->
target
();
const
auto
precision
=
out_node
->
AsArg
().
type
->
precision
();
const
auto
precision
=
out_node
->
AsArg
().
type
->
precision
();
...
@@ -610,79 +642,88 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
...
@@ -610,79 +642,88 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
}
}
}
}
std
::
string
CheckInputAndInsert
(
Scope
*
scope
,
std
::
pair
<
bool
,
std
::
string
>
CheckInputAndInsert
(
Scope
*
scope
,
cpp
::
BlockDesc
*
block_desc
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
string
&
input_name
,
const
std
::
string
&
input_name
,
const
Type
*
tensor_type
,
const
Type
*
tensor_type
,
const
Type
*
subgraph_type
)
{
const
Type
*
subgraph_type
)
{
auto
cur_node
=
input_name
;
auto
cur_node
=
input_name
;
if
(
DataLayoutCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
bool
do_insert
=
false
;
if
(
!
DataLayoutCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
auto
layout_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
auto
layout_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
auto
layout_arg_name
=
string_format
(
"%s/layout"
,
cur_node
.
c_str
());
auto
layout_arg_name
=
string_format
(
"%s/layout"
,
cur_node
.
c_str
());
scope
->
Var
(
layout_arg_name
);
scope
->
Var
(
layout_arg_name
);
VLOG
(
5
)
<<
"insert layout
in subgraph
, arg tensor name: "
VLOG
(
5
)
<<
"insert layout
for subgraph input
, arg tensor name: "
<<
layout_arg_name
;
<<
layout_arg_name
;
layout_op
->
SetType
(
"layout"
);
layout_op
->
SetType
(
"layout"
);
layout_op
->
SetInput
(
"Input"
,
{
cur_node
});
layout_op
->
SetInput
(
"Input"
,
{
cur_node
});
layout_op
->
SetOutput
(
"Out"
,
{
layout_arg_name
});
layout_op
->
SetOutput
(
"Out"
,
{
layout_arg_name
});
cur_node
=
layout_arg_name
;
cur_node
=
layout_arg_name
;
do_insert
=
true
;
}
}
if
(
PrecisionCompatible
(
*
tensor_type
,
*
subgraph_type
)
&&
if
(
!
PrecisionCompatible
(
*
tensor_type
,
*
subgraph_type
)
&&
tensor_type
->
precision
()
!=
PRECISION
(
kInt8
))
{
tensor_type
->
precision
()
!=
PRECISION
(
kInt8
))
{
auto
cast_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
auto
cast_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
auto
cast_arg_name
=
string_format
(
"%s/cast"
,
cur_node
.
c_str
());
auto
cast_arg_name
=
string_format
(
"%s/cast"
,
cur_node
.
c_str
());
scope
->
Var
(
cast_arg_name
);
scope
->
Var
(
cast_arg_name
);
VLOG
(
5
)
<<
"insert cast in subgraph, arg tensor name: "
<<
cast_arg_name
;
VLOG
(
5
)
<<
"insert cast for subgraph input, arg tensor name: "
<<
cast_arg_name
;
cast_op
->
SetType
(
"cast"
);
cast_op
->
SetType
(
"cast"
);
cast_op
->
SetAttr
<
int
>
(
"in_dtype"
,
4
);
// FP32
cast_op
->
SetAttr
<
int
>
(
"in_dtype"
,
5
);
// FP32
cast_op
->
SetAttr
<
int
>
(
"out_dtype"
,
5
);
// FP16
cast_op
->
SetAttr
<
int
>
(
"out_dtype"
,
4
);
// FP16
cast_op
->
SetInput
(
"X"
,
{
cur_node
});
cast_op
->
SetInput
(
"X"
,
{
cur_node
});
cast_op
->
SetOutput
(
"Out"
,
{
cast_arg_name
});
cast_op
->
SetOutput
(
"Out"
,
{
cast_arg_name
});
cur_node
=
cast_arg_name
;
cur_node
=
cast_arg_name
;
do_insert
=
true
;
}
}
return
cur_node
;
return
std
::
make_pair
(
do_insert
,
cur_node
)
;
}
}
std
::
string
CheckOutputAndInsert
(
Scope
*
scope
,
std
::
pair
<
bool
,
std
::
string
>
CheckOutputAndInsert
(
cpp
::
BlockDesc
*
block_desc
,
Scope
*
scope
,
const
std
::
string
&
output_name
,
cpp
::
BlockDesc
*
block_desc
,
const
Type
*
tensor_type
,
const
std
::
string
&
output_name
,
const
Type
*
subgraph_type
)
{
const
Type
*
tensor_type
,
const
Type
*
subgraph_type
)
{
auto
cur_node
=
output_name
;
auto
cur_node
=
output_name
;
bool
do_insert
=
false
;
cpp
::
OpDesc
*
layout_op
=
nullptr
,
*
cast_op
=
nullptr
;
cpp
::
OpDesc
*
layout_op
=
nullptr
,
*
cast_op
=
nullptr
;
// subgraph -> cast -> layout -> output
// subgraph -> cast -> layout -> output
if
(
PrecisionCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
if
(
!
PrecisionCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
cast_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
cast_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
cast_op
->
SetType
(
"cast"
);
cast_op
->
SetType
(
"cast"
);
cast_op
->
SetAttr
<
int
>
(
"in_dtype"
,
5
);
// FP16
cast_op
->
SetAttr
<
int
>
(
"in_dtype"
,
4
);
// FP16
cast_op
->
SetAttr
<
int
>
(
"out_dtype"
,
4
);
// FP32
cast_op
->
SetAttr
<
int
>
(
"out_dtype"
,
5
);
// FP32
do_insert
=
true
;
}
}
if
(
DataLayoutCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
if
(
!
DataLayoutCompatible
(
*
tensor_type
,
*
subgraph_type
))
{
auto
layout_arg_name
=
string_format
(
"%s/layout"
,
cur_node
.
c_str
());
auto
layout_arg_name
=
string_format
(
"%s/layout"
,
cur_node
.
c_str
());
scope
->
Var
(
layout_arg_name
);
scope
->
Var
(
layout_arg_name
);
VLOG
(
5
)
<<
"insert layout
in subgraph
, arg tensor name: "
VLOG
(
5
)
<<
"insert layout
for subgraph output
, arg tensor name: "
<<
layout_arg_name
;
<<
layout_arg_name
;
layout_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
layout_op
=
block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
layout_op
->
SetType
(
"layout"
);
layout_op
->
SetType
(
"layout"
);
layout_op
->
SetInput
(
"Input"
,
{
layout_arg_name
});
layout_op
->
SetInput
(
"Input"
,
{
layout_arg_name
});
layout_op
->
SetOutput
(
"Out"
,
{
cur_node
});
layout_op
->
SetOutput
(
"Out"
,
{
cur_node
});
cur_node
=
layout_arg_name
;
cur_node
=
layout_arg_name
;
do_insert
=
true
;
}
}
if
(
cast_op
)
{
if
(
cast_op
)
{
auto
cast_arg_name
=
string_format
(
"%s/cast"
,
cur_node
.
c_str
());
auto
cast_arg_name
=
string_format
(
"%s/cast"
,
cur_node
.
c_str
());
scope
->
Var
(
cast_arg_name
);
scope
->
Var
(
cast_arg_name
);
VLOG
(
5
)
<<
"insert cast in subgraph, arg tensor name: "
<<
cast_arg_name
;
VLOG
(
5
)
<<
"insert cast for subgraph output, arg tensor name: "
<<
cast_arg_name
;
cast_op
->
SetInput
(
"X"
,
{
cast_arg_name
});
cast_op
->
SetInput
(
"X"
,
{
cast_arg_name
});
cast_op
->
SetOutput
(
"Out"
,
{
cur_node
});
cast_op
->
SetOutput
(
"Out"
,
{
cur_node
});
cur_node
=
cast_arg_name
;
cur_node
=
cast_arg_name
;
}
}
return
cur_node
;
return
std
::
make_pair
(
do_insert
,
cur_node
)
;
}
}
// insert cast op on mlu, to avoid cast on cpu, invoke before first run
// insert cast op on mlu, to avoid cast on cpu, invoke before first run
...
@@ -711,22 +752,28 @@ void MLUPostprocessPass::AdjustSubgraph(Node* subgraph_node,
...
@@ -711,22 +752,28 @@ void MLUPostprocessPass::AdjustSubgraph(Node* subgraph_node,
auto
input_name
=
input
->
AsArg
().
name
;
auto
input_name
=
input
->
AsArg
().
name
;
if
(
!
(
input
->
AsArg
().
is_weight
||
input
->
AsArg
().
is_persist
))
{
if
(
!
(
input
->
AsArg
().
is_weight
||
input
->
AsArg
().
is_persist
))
{
i_names
.
emplace_back
(
input_name
);
i_names
.
emplace_back
(
input_name
);
node_replace
[
input_name
]
=
CheckInputAndInsert
(
op
->
scope
(),
auto
ret
=
CheckInputAndInsert
(
op
->
scope
(),
new_block_desc
,
new_block_desc
,
input_name
,
input_name
,
input
->
AsArg
().
type
,
input
->
AsArg
().
type
,
subgraph_type
);
subgraph_type
);
if
(
ret
.
first
)
{
node_replace
[
input_name
]
=
ret
.
second
;
}
}
}
}
}
for
(
auto
&
output
:
subgraph_node
->
outlinks
)
{
for
(
auto
&
output
:
subgraph_node
->
outlinks
)
{
auto
output_name
=
output
->
AsArg
().
name
;
auto
output_name
=
output
->
AsArg
().
name
;
if
(
!
(
output
->
AsArg
().
is_weight
||
output
->
AsArg
().
is_persist
))
{
if
(
!
(
output
->
AsArg
().
is_weight
||
output
->
AsArg
().
is_persist
))
{
o_names
.
emplace_back
(
output_name
);
o_names
.
emplace_back
(
output_name
);
node_replace
[
output_name
]
=
CheckOutputAndInsert
(
op
->
scope
(),
auto
ret
=
CheckOutputAndInsert
(
op
->
scope
(),
block_desc
,
block_desc
,
output_name
,
output_name
,
output
->
AsArg
().
type
,
output
->
AsArg
().
type
,
subgraph_type
);
subgraph_type
);
if
(
ret
.
first
)
{
node_replace
[
output_name
]
=
ret
.
second
;
}
}
}
}
}
...
@@ -736,34 +783,38 @@ void MLUPostprocessPass::AdjustSubgraph(Node* subgraph_node,
...
@@ -736,34 +783,38 @@ void MLUPostprocessPass::AdjustSubgraph(Node* subgraph_node,
auto
new_desc
=
new_block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
auto
new_desc
=
new_block_desc
->
AddOp
<
cpp
::
OpDesc
>
();
*
new_desc
=
*
desc
;
*
new_desc
=
*
desc
;
auto
op_input_args
=
new_desc
->
InputArgumentNames
();
if
(
desc
->
Type
()
!=
"layout"
&&
desc
->
Type
()
!=
"cast"
)
{
for
(
auto
&
input_arg
:
op_input_args
)
{
auto
op_input_args
=
new_desc
->
InputArgumentNames
();
auto
op_input
=
new_desc
->
Input
(
input_arg
);
for
(
auto
&
input_arg
:
op_input_args
)
{
for
(
auto
&
it
:
i_names
)
{
auto
op_input
=
new_desc
->
Input
(
input_arg
);
auto
index
=
std
::
find
(
op_input
.
cbegin
(),
op_input
.
cend
(),
it
);
for
(
auto
&
it
:
i_names
)
{
if
(
index
!=
op_input
.
cend
())
{
auto
index
=
std
::
find
(
op_input
.
cbegin
(),
op_input
.
cend
(),
it
);
index
=
op_input
.
erase
(
index
);
if
(
index
!=
op_input
.
cend
()
&&
op_input
.
emplace
(
index
,
node_replace
.
at
(
it
));
node_replace
.
find
(
it
)
!=
node_replace
.
end
())
{
VLOG
(
4
)
<<
new_desc
->
Type
()
<<
"] change input from "
<<
it
<<
" to "
index
=
op_input
.
erase
(
index
);
<<
node_replace
.
at
(
it
);
op_input
.
emplace
(
index
,
node_replace
.
at
(
it
));
VLOG
(
4
)
<<
new_desc
->
Type
()
<<
"] change input from "
<<
it
<<
" to "
<<
node_replace
.
at
(
it
);
}
}
}
new_desc
->
SetInput
(
input_arg
,
op_input
);
}
}
new_desc
->
SetInput
(
input_arg
,
op_input
);
}
auto
op_output_args
=
new_desc
->
OutputArgumentNames
();
auto
op_output_args
=
new_desc
->
OutputArgumentNames
();
for
(
auto
&
output_arg
:
op_output_args
)
{
for
(
auto
&
output_arg
:
op_output_args
)
{
auto
op_output
=
new_desc
->
Output
(
output_arg
);
auto
op_output
=
new_desc
->
Output
(
output_arg
);
for
(
auto
&
it
:
o_names
)
{
for
(
auto
&
it
:
o_names
)
{
auto
index
=
std
::
find
(
op_output
.
cbegin
(),
op_output
.
cend
(),
it
);
auto
index
=
std
::
find
(
op_output
.
cbegin
(),
op_output
.
cend
(),
it
);
if
(
index
!=
op_output
.
cend
())
{
if
(
index
!=
op_output
.
cend
()
&&
index
=
op_output
.
erase
(
index
);
node_replace
.
find
(
it
)
!=
node_replace
.
end
())
{
op_output
.
emplace
(
index
,
node_replace
.
at
(
it
));
index
=
op_output
.
erase
(
index
);
VLOG
(
4
)
<<
new_desc
->
Type
()
<<
"] change output from "
<<
it
<<
" to "
op_output
.
emplace
(
index
,
node_replace
.
at
(
it
));
<<
node_replace
.
at
(
it
);
VLOG
(
4
)
<<
new_desc
->
Type
()
<<
"] change output from "
<<
it
<<
" to "
<<
node_replace
.
at
(
it
);
}
}
}
new_desc
->
SetOutput
(
output_arg
,
op_output
);
}
}
new_desc
->
SetOutput
(
output_arg
,
op_output
);
}
}
}
}
op
->
SetSubBlock
(
new_block_desc
);
op
->
SetSubBlock
(
new_block_desc
);
...
...
lite/kernels/mlu/bridges/cast_op.cc
浏览文件 @
5f12addc
...
@@ -40,26 +40,21 @@ int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) {
...
@@ -40,26 +40,21 @@ int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK
(
graph
->
HasNode
(
x_var_name
));
CHECK
(
graph
->
HasNode
(
x_var_name
));
auto
x_tensor
=
graph
->
GetNode
(
x_var_name
);
auto
x_tensor
=
graph
->
GetNode
(
x_var_name
);
cnmlDataType_t
data_type
;
cnmlDataType_t
out_type
;
if
(
out_dtype
==
4
)
{
data_type
=
CNML_DATA_FLOAT16
;
}
else
if
(
out_dtype
==
5
)
{
data_type
=
CNML_DATA_FLOAT32
;
}
else
{
CHECK
(
0
)
<<
"Unsupported data_type"
;
}
auto
output_tensor
=
graph
->
AddNode
(
out_var_name
,
output_dims
,
CNML_TENSOR
,
CNML_NCHW
,
data_type
);
cnmlCastType_t
cast_type
;
cnmlCastType_t
cast_type
;
if
(
in_dtype
==
4
&&
out_dtype
==
5
)
{
if
(
in_dtype
==
4
&&
out_dtype
==
5
)
{
cast_type
=
CNML_CAST_FLOAT16_TO_FLOAT32
;
cast_type
=
CNML_CAST_FLOAT16_TO_FLOAT32
;
out_type
=
CNML_DATA_FLOAT32
;
}
else
if
(
in_dtype
==
5
&&
out_dtype
==
4
)
{
}
else
if
(
in_dtype
==
5
&&
out_dtype
==
4
)
{
cast_type
=
CNML_CAST_FLOAT32_TO_FLOAT16
;
cast_type
=
CNML_CAST_FLOAT32_TO_FLOAT16
;
out_type
=
CNML_DATA_FLOAT16
;
}
else
{
}
else
{
CHECK
(
0
)
<<
"Unsupported cast type"
;
CHECK
(
0
)
<<
"Unsupported cast type"
;
}
}
auto
output_tensor
=
graph
->
AddNode
(
out_var_name
,
output_dims
,
CNML_TENSOR
,
CNML_NCHW
,
out_type
);
cnmlBaseOp_t
cast_op
;
cnmlBaseOp_t
cast_op
;
CNML_CALL
(
cnmlCreateCastOp
(
&
cast_op
,
CNML_CALL
(
cnmlCreateCastOp
(
&
cast_op
,
cast_type
,
cast_type
,
...
...
lite/kernels/mlu/bridges/layout_op.cc
浏览文件 @
5f12addc
...
@@ -60,7 +60,7 @@ int LayoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
...
@@ -60,7 +60,7 @@ int LayoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK
(
0
)
<<
"Unsupport shape"
;
CHECK
(
0
)
<<
"Unsupport shape"
;
}
}
output_tensor
=
graph
->
AddNode
(
output_tensor
=
graph
->
AddNode
(
out_var_name
,
output_dims
,
CNML_TENSOR
,
CNML_NCHW
,
graph
->
FPT
ype
());
out_var_name
,
output_dims
,
CNML_TENSOR
,
CNML_NCHW
,
x_tensor
->
dt
ype
());
VLOG
(
3
)
<<
"layout transpose nchw to nhwc"
<<
std
::
endl
;
VLOG
(
3
)
<<
"layout transpose nchw to nhwc"
<<
std
::
endl
;
}
else
{
}
else
{
switch
(
x_dims
.
size
())
{
switch
(
x_dims
.
size
())
{
...
@@ -84,7 +84,7 @@ int LayoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
...
@@ -84,7 +84,7 @@ int LayoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_dims
,
output_dims
,
CNML_TENSOR
,
CNML_TENSOR
,
CNML_NCHW
,
CNML_NCHW
,
graph
->
FPT
ype
(),
x_tensor
->
dt
ype
(),
CNML_NCHW
);
CNML_NCHW
);
}
}
cnmlBaseOp_t
layout_op
;
cnmlBaseOp_t
layout_op
;
...
...
lite/kernels/mlu/io_copy_compute.cc
浏览文件 @
5f12addc
...
@@ -41,6 +41,7 @@ class IoCopyHostToMluCompute
...
@@ -41,6 +41,7 @@ class IoCopyHostToMluCompute
auto
mem_size
=
param
.
x
->
memory_size
();
auto
mem_size
=
param
.
x
->
memory_size
();
// LOG(INFO) << "copy size " << mem_size;
// LOG(INFO) << "copy size " << mem_size;
auto
*
data
=
param
.
y
->
mutable_data
(
TARGET
(
kMLU
),
mem_size
);
auto
*
data
=
param
.
y
->
mutable_data
(
TARGET
(
kMLU
),
mem_size
);
param
.
y
->
set_precision
(
param
.
x
->
precision
());
CopyFromHostSync
(
data
,
param
.
x
->
raw_data
(),
mem_size
);
CopyFromHostSync
(
data
,
param
.
x
->
raw_data
(),
mem_size
);
}
}
...
...
lite/kernels/mlu/subgraph_compute.h
浏览文件 @
5f12addc
...
@@ -147,7 +147,7 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -147,7 +147,7 @@ class SubgraphEngine : public subgraph::Engine {
origin_itensors_
.
clear
();
origin_itensors_
.
clear
();
origin_otensors_
.
clear
();
origin_otensors_
.
clear
();
auto
data_order
=
block_desc_
->
GetOp
<
cpp
::
OpDesc
>
(
0
)
->
Type
()
==
"
cas
t"
auto
data_order
=
block_desc_
->
GetOp
<
cpp
::
OpDesc
>
(
0
)
->
Type
()
==
"
layou
t"
?
CNML_NCHW
?
CNML_NCHW
:
CNML_NHWC
;
:
CNML_NHWC
;
// Convert all of input data vars and added into the MLU IR graph
// Convert all of input data vars and added into the MLU IR graph
...
@@ -166,6 +166,7 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -166,6 +166,7 @@ class SubgraphEngine : public subgraph::Engine {
}
}
CHECK
(
input_tensor
);
CHECK
(
input_tensor
);
VLOG
(
4
)
<<
"subgraph input tensor "
<<
input_name
<<
std
::
endl
;
auto
input_node
=
graph
->
AddNode
(
input_name
,
auto
input_node
=
graph
->
AddNode
(
input_name
,
input_tensor
->
dims
().
Vectorize
(),
input_tensor
->
dims
().
Vectorize
(),
CNML_TENSOR
,
CNML_TENSOR
,
...
@@ -217,6 +218,7 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -217,6 +218,7 @@ class SubgraphEngine : public subgraph::Engine {
graph
->
AddOutput
(
graph
->
GetNode
(
output_name
));
graph
->
AddOutput
(
graph
->
GetNode
(
output_name
));
auto
output_tensor
=
scope_
->
FindMutableTensor
(
output_name
);
auto
output_tensor
=
scope_
->
FindMutableTensor
(
output_name
);
origin_otensors_
.
push_back
(
output_tensor
);
origin_otensors_
.
push_back
(
output_tensor
);
VLOG
(
4
)
<<
"subgraph output tensor "
<<
output_name
<<
std
::
endl
;
// auto node = graph->GetNode(output_name);
// auto node = graph->GetNode(output_name);
// CHECK(p_data);
// CHECK(p_data);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录