Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ca973139
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ca973139
编写于
9月 08, 2018
作者:
T
tensor-tang
提交者:
GitHub
9月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13285 from tensor-tang/refine/ut/lac
add analysis unit test of lac and ner
上级
f90c7865
39c49212
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
486 addition
and
64 deletion
+486
-64
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+2
-0
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+203
-0
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
+50
-0
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+6
-5
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+51
-31
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-0
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+2
-2
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+2
-1
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+90
-10
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
+63
-10
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+0
-2
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+0
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+13
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+2
-2
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -31,7 +31,9 @@ pass_library(fc_fuse_pass inference)
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
...
...
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
0 → 100644
浏览文件 @
ca973139
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
static
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
PDNode
*
x
=
pattern
->
NewNode
(
name_scope
,
"x"
)
->
assert_is_op_input
(
"mul"
)
->
assert_var_not_persistable
();
auto
*
fc_out
=
patterns
::
FC
(
pattern
,
name_scope
,
x
,
with_fc_bias
);
fc_out
->
AsIntermediate
();
// fc_out is a tmp var, will be removed after fuse.
patterns
::
GRU
(
pattern
,
name_scope
,
fc_out
);
VLOG
(
3
)
<<
"fc_gru pattern
\n
"
<<
pattern
->
DotString
();
}
static
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
BuildPattern
(
pattern
,
name_scope
,
with_fc_bias
);
// Create New OpDesc
auto
gru_creater
=
[
&
](
int
gru
,
int
x
,
int
weight_x
,
int
weight_h
,
int
bias
,
int
hidden
,
int
fc_bias
)
{
#define GET_NODE(x) auto* x##_n = graph->RetriveNode(x);
GET_NODE
(
x
);
GET_NODE
(
weight_x
);
GET_NODE
(
weight_h
);
GET_NODE
(
bias
);
GET_NODE
(
hidden
);
GET_NODE
(
gru
);
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_gru"
);
#define NEW_NAME(x) name_scope + "/at." #x ".new"
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__##_n->Name()});
SET_IN
(
X
,
x
);
SET_IN
(
WeightX
,
weight_x
);
SET_IN
(
WeightH
,
weight_h
);
if
(
with_fc_bias
)
{
op_desc
.
SetInput
(
"Bias"
,
{
NEW_NAME
(
bias
)
+
bias_n
->
Name
()});
}
else
{
SET_IN
(
Bias
,
bias
);
}
#undef SET_IN
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden_n
->
Name
()});
op_desc
.
SetAttr
(
"is_reverse"
,
gru_n
->
Op
()
->
GetAttr
(
"is_reverse"
));
// TODO(TJ): This should be a option for infer
op_desc
.
SetAttr
(
"use_seq"
,
true
);
#define SET_IMTERMEDIATE_OUT(key) op_desc.SetOutput(#key, {NEW_NAME(key)})
SET_IMTERMEDIATE_OUT
(
ReorderedH0
);
SET_IMTERMEDIATE_OUT
(
XX
);
SET_IMTERMEDIATE_OUT
(
BatchedInput
);
SET_IMTERMEDIATE_OUT
(
BatchedOut
);
#undef SET_IMTERMEDIATE_OUT
auto
*
op
=
graph
->
CreateOpNode
(
&
op_desc
);
PADDLE_ENFORCE
(
graph
->
Has
(
kParamScopeAttr
));
auto
*
scope
=
graph
->
Get
<
Scope
*>
(
kParamScopeAttr
);
PADDLE_ENFORCE
(
scope
);
if
(
with_fc_bias
)
{
// Fusion GRU bias = fcbias + grubias
auto
*
fusion_bias_var
=
scope
->
Var
(
NEW_NAME
(
bias
)
+
bias_n
->
Name
());
auto
*
out_bias_tensor
=
fusion_bias_var
->
GetMutable
<
framework
::
LoDTensor
>
();
PADDLE_ENFORCE
(
fusion_bias_var
);
GET_NODE
(
fc_bias
);
PADDLE_ENFORCE
(
fc_bias_n
);
auto
*
gru_bias_var
=
scope
->
FindVar
(
bias_n
->
Name
());
auto
*
fc_bias_var
=
scope
->
FindVar
(
fc_bias_n
->
Name
());
PADDLE_ENFORCE
(
gru_bias_var
);
PADDLE_ENFORCE
(
fc_bias_var
);
const
auto
&
gru_bias_tenosr
=
gru_bias_var
->
Get
<
framework
::
LoDTensor
>
();
const
auto
&
fc_bias_tensor
=
fc_bias_var
->
Get
<
framework
::
LoDTensor
>
();
// new bias = fc bias + gru bias
out_bias_tensor
->
Resize
(
gru_bias_tenosr
.
dims
());
auto
*
data
=
out_bias_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
out_bias_tensor
->
numel
();
i
++
)
{
data
[
i
]
=
fc_bias_tensor
.
data
<
float
>
()[
i
]
+
gru_bias_tenosr
.
data
<
float
>
()[
i
];
}
}
#undef GET_NODE
#define NEW_IMTERMEDIATE_OUT(key) \
scope->Var(NEW_NAME(key))->GetMutable<framework::LoDTensor>()
NEW_IMTERMEDIATE_OUT
(
ReorderedH0
);
NEW_IMTERMEDIATE_OUT
(
XX
);
NEW_IMTERMEDIATE_OUT
(
BatchedInput
);
NEW_IMTERMEDIATE_OUT
(
BatchedOut
);
#undef NEW_NAME
#undef NEW_IMTERMEDIATE_OUT
IR_NODE_LINK_TO
(
x_n
,
op
);
IR_NODE_LINK_TO
(
weight_x_n
,
op
);
IR_NODE_LINK_TO
(
weight_h_n
,
op
);
IR_NODE_LINK_TO
(
bias_n
,
op
);
// actually should link to new bias if have
IR_NODE_LINK_TO
(
op
,
hidden_n
);
// h0?
return
op
;
};
int
fusion_count
{
0
};
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
#define GET_NODE(name__) \
std::string name__##key = name_scope + "/" + #name__; \
auto* name__##n = pattern->RetrieveNode(name__##key); \
PADDLE_ENFORCE(name__##n); \
PADDLE_ENFORCE(subgraph.count(name__##n)); \
Node* name__##_n = subgraph.at(name__##n); \
int name__ __attribute__((unused)) = name__##_n->id();
GET_NODE
(
x
);
GET_NODE
(
w
);
// fc weight
GET_NODE
(
mul
);
GET_NODE
(
fc_out
);
GET_NODE
(
Weight
);
GET_NODE
(
gru
);
GET_NODE
(
Bias
);
GET_NODE
(
Hidden
);
// nodes need be removed
GET_NODE
(
BatchGate
);
GET_NODE
(
BatchResetHiddenPrev
);
GET_NODE
(
BatchHidden
);
if
(
with_fc_bias
)
{
GET_NODE
(
mul_out
);
GET_NODE
(
fc_bias
);
GET_NODE
(
elementwise_add
);
gru_creater
(
gru
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
fc_bias
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
gru_n
,
elementwise_add_n
,
fc_bias_n
,
fc_out_n
,
mul_out_n
,
BatchGate_n
,
BatchResetHiddenPrev_n
,
BatchHidden_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
else
{
gru_creater
(
gru
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
-
1
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
gru_n
,
BatchGate_n
,
BatchResetHiddenPrev_n
,
BatchHidden_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
#undef GET_NODE
++
fusion_count
;
};
gpd
(
graph
,
handler
);
return
fusion_count
;
}
std
::
unique_ptr
<
ir
::
Graph
>
MulGRUFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
(),
false
/*with_fc_bias*/
);
AddStatis
(
fusion_count
);
return
graph
;
}
std
::
unique_ptr
<
ir
::
Graph
>
FCGRUFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
(),
true
/*with_fc_bias*/
);
AddStatis
(
fusion_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
mul_gru_fuse_pass
,
paddle
::
framework
::
ir
::
MulGRUFusePass
);
REGISTER_PASS
(
fc_gru_fuse_pass
,
paddle
::
framework
::
ir
::
FCGRUFusePass
);
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
0 → 100644
浏览文件 @
ca973139
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
// The MulGRUFusePass and MulGRUFusePass will fuse to the same FusionGRU op.
class
FCGRUFusePass
:
public
FusePassBase
{
public:
virtual
~
FCGRUFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"fc_gru_fuse"
};
};
// Just FC without bias
class
MulGRUFusePass
:
public
FusePassBase
{
public:
virtual
~
MulGRUFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"fc_nobias_gru_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
ca973139
...
...
@@ -20,12 +20,13 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
std
::
string
GenNodeName
(
const
std
::
string
&
prefix
,
const
std
::
string
&
name
)
{
static
std
::
string
GenNodeName
(
const
std
::
string
&
prefix
,
const
std
::
string
&
name
)
{
return
prefix
+
"/"
+
name
;
}
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
static
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
PDNode
*
x
=
pattern
->
NewNode
(
name_scope
,
"x"
)
->
assert_is_op_input
(
"mul"
)
->
assert_var_not_persistable
();
...
...
@@ -35,8 +36,8 @@ void BuildPattern(PDPattern* pattern, const std::string& name_scope,
// LOG(INFO) << "\n" << pattern->DotString();
}
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
static
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_
scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
ca973139
...
...
@@ -519,76 +519,96 @@ bool VarLinksFromOp(Node* node, const std::string& op_type) {
PDNode
*
patterns
::
FC
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
,
bool
with_bias
)
{
// Create Operators
PDNode
*
elementwise_add_op
{
nullptr
};
// mul op
auto
*
mul_op
=
pattern
->
NewNode
(
name_scope
,
"mul"
)
->
assert_is_op
(
"mul"
);
if
(
with_bias
)
{
elementwise_add_op
=
pattern
->
NewNode
(
name_scope
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
}
// Create variables
// w
auto
*
mul_weight_var
=
pattern
->
NewNode
(
name_scope
,
"w"
)
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_nth_input
(
"mul"
,
"Y"
,
0
);
PDNode
*
mul_out_var
{
nullptr
};
->
assert_is_op_input
(
"mul"
,
"Y"
);
PDNode
*
fc_out
{
nullptr
};
if
(
with_bias
)
{
PDNode
*
elementwise_add_op
{
nullptr
};
PDNode
*
mul_out_var
{
nullptr
},
*
bias
{
nullptr
};
elementwise_add_op
=
pattern
->
NewNode
(
name_scope
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
// intermediate variable, will be removed in the IR after fuse.
mul_out_var
=
pattern
->
NewNode
(
name_scope
,
"mul_out"
)
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"mul"
)
->
assert_is_op_input
(
"elementwise_add"
);
}
PDNode
*
bias
{
nullptr
},
*
fc_out
{
nullptr
};
if
(
with_bias
)
{
// bias
bias
=
pattern
->
NewNode
(
name_scope
,
"fc_bias"
)
->
assert_is_op_input
(
"elementwise_add"
)
->
AsInput
(
);
->
AsInput
(
)
->
assert_is_op_input
(
"elementwise_add"
);
// output
fc_out
=
pattern
->
NewNode
(
name_scope
,
"fc_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
mul_op
->
LinksFrom
({
x
,
mul_weight_var
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
bias
}).
LinksTo
({
fc_out
});
}
else
{
fc_out
=
pattern
->
NewNode
(
name_scope
,
"fc_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"mul"
);
}
if
(
with_bias
)
{
mul_op
->
LinksFrom
({
mul_weight_var
,
x
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
bias
}).
LinksTo
({
fc_out
});
}
else
{
mul_op
->
LinksFrom
({
mul_weight_var
,
x
}).
LinksTo
({
fc_out
});
}
return
fc_out
;
}
#define NEW_NODE(op__, arg__, io__) \
auto* arg__ = pattern->NewNode(name_scope, #arg__) \
->assert_is_op_##io__(#op__, #arg__);
PDNode
*
patterns
::
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"lstm"
,
"Input"
);
auto
*
lstm_op
=
pattern
->
NewNode
(
name_scope
,
"lstm"
)
->
assert_is_op
(
"lstm"
);
#define NEW_NODE(arg__, io__) \
auto* arg__ = pattern->NewNode(name_scope, #arg__) \
->assert_is_op_##io__("lstm", #arg__);
// Currently, the H0 and C0 are optional
// TODO(Superjomn) upgrade the fuse framework to support optional.
// NEW_NODE(H0, input);
// NEW_NODE(C0, input);
NEW_NODE
(
Weight
,
input
);
NEW_NODE
(
Bias
,
input
);
NEW_NODE
(
lstm
,
Weight
,
input
);
NEW_NODE
(
lstm
,
Bias
,
input
);
NEW_NODE
(
Hidden
,
output
);
NEW_NODE
(
Cell
,
output
);
NEW_NODE
(
BatchGate
,
output
);
NEW_NODE
(
BatchCellPreAct
,
output
);
NEW_NODE
(
lstm
,
Hidden
,
output
);
NEW_NODE
(
lstm
,
Cell
,
output
);
NEW_NODE
(
lstm
,
BatchGate
,
output
);
NEW_NODE
(
lstm
,
BatchCellPreAct
,
output
);
lstm_op
->
LinksFrom
({
x
,
Weight
,
Bias
});
lstm_op
->
LinksTo
({
Hidden
,
Cell
,
BatchGate
,
BatchCellPreAct
});
return
Hidden
;
}
PDNode
*
patterns
::
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"gru"
,
"Input"
);
auto
*
gru_op
=
pattern
->
NewNode
(
name_scope
,
"gru"
)
->
assert_is_op
(
"gru"
);
NEW_NODE
(
gru
,
Weight
,
input
);
// TODO(Superjomn): upgrade the fuse framework to support optional.
// H0 and bias are optional
NEW_NODE
(
gru
,
Bias
,
input
);
// also optional
// NEW_NODE(H0, input);
NEW_NODE
(
gru
,
Hidden
,
output
);
// below are intermediate
NEW_NODE
(
gru
,
BatchGate
,
output
);
NEW_NODE
(
gru
,
BatchResetHiddenPrev
,
output
);
NEW_NODE
(
gru
,
BatchHidden
,
output
);
BatchGate
->
AsIntermediate
();
BatchResetHiddenPrev
->
AsIntermediate
();
BatchHidden
->
AsIntermediate
();
gru_op
->
LinksFrom
({
x
,
Weight
,
Bias
});
gru_op
->
LinksTo
({
Hidden
,
BatchGate
,
BatchResetHiddenPrev
,
BatchHidden
});
return
Hidden
;
}
#undef NEW_NODE
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
ca973139
...
...
@@ -298,6 +298,8 @@ PDNode* FC(PDPattern* pattern, const std::string& name_scope, PDNode* x,
PDNode
*
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
);
PDNode
*
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
);
}
// namespace patterns
#define IR_NODE_LINK_TO(a, b) \
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -81,7 +81,7 @@ if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
endif
()
inference_analysis_test
(
test_analyzer_ner SRCS analyzer_ner_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
EXTRA_DEPS paddle_inference_api paddle_fluid_api
analysis_predictor
ARGS --infer_model=
${
CHINESE_NER_INSTALL_DIR
}
/model
--infer_data=
${
CHINESE_NER_INSTALL_DIR
}
/data.txt
)
...
...
@@ -94,7 +94,7 @@ if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
endif
()
inference_analysis_test
(
test_analyzer_lac SRCS analyzer_lac_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
EXTRA_DEPS paddle_inference_api paddle_fluid_api
ir_pass_manager analysis_predictor
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
ca973139
...
...
@@ -38,7 +38,6 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
...
...
@@ -69,6 +68,8 @@ class Analyzer : public OrderedRegistry<PassManager> {
"attention_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
}};
...
...
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
浏览文件 @
ca973139
...
...
@@ -11,13 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
fuse_pass_base
.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_
api
.h"
#include "paddle/fluid/inference/api/paddle_inference_
pass
.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path for LAC"
);
...
...
@@ -102,6 +103,7 @@ struct DataRecord {
return
data
;
}
};
void
GetOneBatch
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
auto
one_batch
=
data
->
NextBatch
();
...
...
@@ -114,6 +116,7 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
void
BenchAllData
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
)
{
NativeConfig
config
;
...
...
@@ -141,17 +144,16 @@ void BenchAllData(const std::string &model_path, const std::string &data_file,
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
void
TestLACPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
,
bool
test_all_data
)
{
if
(
test_all_data
)
{
BenchAllData
(
model_path
,
data_file
,
batch_size
,
repeat
);
return
;
}
const
int
repeat
,
bool
test_all_data
,
bool
use_analysis
=
false
)
{
NativeConfig
config
;
config
.
model_dir
=
model_path
;
config
.
use_gpu
=
false
;
...
...
@@ -160,17 +162,47 @@ void TestLACPrediction(const std::string &model_path,
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
Timer
timer
;
if
(
test_all_data
)
{
double
sum
=
0
;
LOG
(
INFO
)
<<
"Total number of samples: "
<<
data
.
datasets
.
size
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
sum
+=
timer
.
toc
();
}
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
LOG
(
INFO
)
<<
"Average latency of each sample: "
<<
sum
/
repeat
/
data
.
datasets
.
size
()
<<
" ms"
;
return
;
}
timer
.
tic
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
// check result
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
auto
&
out
=
outputs_slots
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
...
...
@@ -182,12 +214,60 @@ void TestLACPrediction(const std::string &model_path,
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs_slots
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
pdata
[
i
]);
}
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
}
TEST
(
Analyzer_LAC
,
native
)
{
LOG
(
INFO
)
<<
"LAC with native"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
,
FLAGS_test_all_data
);
}
TEST
(
Analyzer_LAC
,
analysis
)
{
LOG
(
INFO
)
<<
"LAC with analysis"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
,
FLAGS_test_all_data
,
true
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
浏览文件 @
ca973139
...
...
@@ -13,12 +13,12 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
fuse_pass_base
.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_
api
.h"
#include "paddle/fluid/inference/api/paddle_inference_
pass
.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
...
...
@@ -112,7 +112,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
void
TestChineseNERPrediction
()
{
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
NativeConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
...
...
@@ -120,11 +120,23 @@ void TestChineseNERPrediction() {
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
...
...
@@ -165,10 +177,51 @@ void TestChineseNERPrediction() {
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
PADDLE_ENFORCE
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
result
[
i
]);
}
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
}
// Directly infer with the original model.
TEST
(
Analyzer
,
Chinese_ner
)
{
TestChineseNERPrediction
();
}
TEST
(
Analyzer_Chinese_ner
,
native
)
{
TestChineseNERPrediction
(
false
);
}
TEST
(
Analyzer_Chinese_ner
,
analysis
)
{
TestChineseNERPrediction
(
true
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
ca973139
...
...
@@ -283,7 +283,6 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
LOG
(
INFO
)
<<
"===========profile result==========="
;
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
...
...
@@ -324,7 +323,6 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
threads
[
i
].
join
();
}
}
LOG
(
INFO
)
<<
"====================================="
;
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -45,7 +45,6 @@ endfunction(inference_api_test)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
DEPS paddle_inference_api
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ca973139
...
...
@@ -22,12 +22,25 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
profile
);
namespace
paddle
{
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
auto
tracking_device
=
config_
.
use_gpu
?
platform
::
ProfilerState
::
kAll
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
#endif
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
ca973139
...
...
@@ -124,9 +124,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
LOG
(
INFO
)
<<
"
======
batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms"
;
<<
", latency: "
<<
latency
<<
"ms
======
"
;
}
}
// namespace inference
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录