Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ca973139
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ca973139
编写于
9月 08, 2018
作者:
T
tensor-tang
提交者:
GitHub
9月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13285 from tensor-tang/refine/ut/lac
add analysis unit test of lac and ner
上级
f90c7865
39c49212
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
486 addition
and
64 deletion
+486
-64
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+2
-0
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+203
-0
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
+50
-0
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+6
-5
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+51
-31
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-0
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+2
-2
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+2
-1
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+90
-10
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
+63
-10
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+0
-2
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+0
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+13
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+2
-2
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -31,7 +31,9 @@ pass_library(fc_fuse_pass inference)
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
...
...
paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
0 → 100644
浏览文件 @
ca973139
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
static
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
PDNode
*
x
=
pattern
->
NewNode
(
name_scope
,
"x"
)
->
assert_is_op_input
(
"mul"
)
->
assert_var_not_persistable
();
auto
*
fc_out
=
patterns
::
FC
(
pattern
,
name_scope
,
x
,
with_fc_bias
);
fc_out
->
AsIntermediate
();
// fc_out is a tmp var, will be removed after fuse.
patterns
::
GRU
(
pattern
,
name_scope
,
fc_out
);
VLOG
(
3
)
<<
"fc_gru pattern
\n
"
<<
pattern
->
DotString
();
}
static
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
BuildPattern
(
pattern
,
name_scope
,
with_fc_bias
);
// Create New OpDesc
auto
gru_creater
=
[
&
](
int
gru
,
int
x
,
int
weight_x
,
int
weight_h
,
int
bias
,
int
hidden
,
int
fc_bias
)
{
#define GET_NODE(x) auto* x##_n = graph->RetriveNode(x);
GET_NODE
(
x
);
GET_NODE
(
weight_x
);
GET_NODE
(
weight_h
);
GET_NODE
(
bias
);
GET_NODE
(
hidden
);
GET_NODE
(
gru
);
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_gru"
);
#define NEW_NAME(x) name_scope + "/at." #x ".new"
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__##_n->Name()});
SET_IN
(
X
,
x
);
SET_IN
(
WeightX
,
weight_x
);
SET_IN
(
WeightH
,
weight_h
);
if
(
with_fc_bias
)
{
op_desc
.
SetInput
(
"Bias"
,
{
NEW_NAME
(
bias
)
+
bias_n
->
Name
()});
}
else
{
SET_IN
(
Bias
,
bias
);
}
#undef SET_IN
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden_n
->
Name
()});
op_desc
.
SetAttr
(
"is_reverse"
,
gru_n
->
Op
()
->
GetAttr
(
"is_reverse"
));
// TODO(TJ): This should be a option for infer
op_desc
.
SetAttr
(
"use_seq"
,
true
);
#define SET_IMTERMEDIATE_OUT(key) op_desc.SetOutput(#key, {NEW_NAME(key)})
SET_IMTERMEDIATE_OUT
(
ReorderedH0
);
SET_IMTERMEDIATE_OUT
(
XX
);
SET_IMTERMEDIATE_OUT
(
BatchedInput
);
SET_IMTERMEDIATE_OUT
(
BatchedOut
);
#undef SET_IMTERMEDIATE_OUT
auto
*
op
=
graph
->
CreateOpNode
(
&
op_desc
);
PADDLE_ENFORCE
(
graph
->
Has
(
kParamScopeAttr
));
auto
*
scope
=
graph
->
Get
<
Scope
*>
(
kParamScopeAttr
);
PADDLE_ENFORCE
(
scope
);
if
(
with_fc_bias
)
{
// Fusion GRU bias = fcbias + grubias
auto
*
fusion_bias_var
=
scope
->
Var
(
NEW_NAME
(
bias
)
+
bias_n
->
Name
());
auto
*
out_bias_tensor
=
fusion_bias_var
->
GetMutable
<
framework
::
LoDTensor
>
();
PADDLE_ENFORCE
(
fusion_bias_var
);
GET_NODE
(
fc_bias
);
PADDLE_ENFORCE
(
fc_bias_n
);
auto
*
gru_bias_var
=
scope
->
FindVar
(
bias_n
->
Name
());
auto
*
fc_bias_var
=
scope
->
FindVar
(
fc_bias_n
->
Name
());
PADDLE_ENFORCE
(
gru_bias_var
);
PADDLE_ENFORCE
(
fc_bias_var
);
const
auto
&
gru_bias_tenosr
=
gru_bias_var
->
Get
<
framework
::
LoDTensor
>
();
const
auto
&
fc_bias_tensor
=
fc_bias_var
->
Get
<
framework
::
LoDTensor
>
();
// new bias = fc bias + gru bias
out_bias_tensor
->
Resize
(
gru_bias_tenosr
.
dims
());
auto
*
data
=
out_bias_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
out_bias_tensor
->
numel
();
i
++
)
{
data
[
i
]
=
fc_bias_tensor
.
data
<
float
>
()[
i
]
+
gru_bias_tenosr
.
data
<
float
>
()[
i
];
}
}
#undef GET_NODE
#define NEW_IMTERMEDIATE_OUT(key) \
scope->Var(NEW_NAME(key))->GetMutable<framework::LoDTensor>()
NEW_IMTERMEDIATE_OUT
(
ReorderedH0
);
NEW_IMTERMEDIATE_OUT
(
XX
);
NEW_IMTERMEDIATE_OUT
(
BatchedInput
);
NEW_IMTERMEDIATE_OUT
(
BatchedOut
);
#undef NEW_NAME
#undef NEW_IMTERMEDIATE_OUT
IR_NODE_LINK_TO
(
x_n
,
op
);
IR_NODE_LINK_TO
(
weight_x_n
,
op
);
IR_NODE_LINK_TO
(
weight_h_n
,
op
);
IR_NODE_LINK_TO
(
bias_n
,
op
);
// actually should link to new bias if have
IR_NODE_LINK_TO
(
op
,
hidden_n
);
// h0?
return
op
;
};
int
fusion_count
{
0
};
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
#define GET_NODE(name__) \
std::string name__##key = name_scope + "/" + #name__; \
auto* name__##n = pattern->RetrieveNode(name__##key); \
PADDLE_ENFORCE(name__##n); \
PADDLE_ENFORCE(subgraph.count(name__##n)); \
Node* name__##_n = subgraph.at(name__##n); \
int name__ __attribute__((unused)) = name__##_n->id();
GET_NODE
(
x
);
GET_NODE
(
w
);
// fc weight
GET_NODE
(
mul
);
GET_NODE
(
fc_out
);
GET_NODE
(
Weight
);
GET_NODE
(
gru
);
GET_NODE
(
Bias
);
GET_NODE
(
Hidden
);
// nodes need be removed
GET_NODE
(
BatchGate
);
GET_NODE
(
BatchResetHiddenPrev
);
GET_NODE
(
BatchHidden
);
if
(
with_fc_bias
)
{
GET_NODE
(
mul_out
);
GET_NODE
(
fc_bias
);
GET_NODE
(
elementwise_add
);
gru_creater
(
gru
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
fc_bias
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
gru_n
,
elementwise_add_n
,
fc_bias_n
,
fc_out_n
,
mul_out_n
,
BatchGate_n
,
BatchResetHiddenPrev_n
,
BatchHidden_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
else
{
gru_creater
(
gru
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
-
1
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
gru_n
,
BatchGate_n
,
BatchResetHiddenPrev_n
,
BatchHidden_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
#undef GET_NODE
++
fusion_count
;
};
gpd
(
graph
,
handler
);
return
fusion_count
;
}
std
::
unique_ptr
<
ir
::
Graph
>
MulGRUFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
(),
false
/*with_fc_bias*/
);
AddStatis
(
fusion_count
);
return
graph
;
}
std
::
unique_ptr
<
ir
::
Graph
>
FCGRUFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
(),
true
/*with_fc_bias*/
);
AddStatis
(
fusion_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
mul_gru_fuse_pass
,
paddle
::
framework
::
ir
::
MulGRUFusePass
);
REGISTER_PASS
(
fc_gru_fuse_pass
,
paddle
::
framework
::
ir
::
FCGRUFusePass
);
paddle/fluid/framework/ir/fc_gru_fuse_pass.h
0 → 100644
浏览文件 @
ca973139
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
// The MulGRUFusePass and MulGRUFusePass will fuse to the same FusionGRU op.
class
FCGRUFusePass
:
public
FusePassBase
{
public:
virtual
~
FCGRUFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"fc_gru_fuse"
};
};
// Just FC without bias
class
MulGRUFusePass
:
public
FusePassBase
{
public:
virtual
~
MulGRUFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"fc_nobias_gru_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
ca973139
...
...
@@ -20,12 +20,13 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
std
::
string
GenNodeName
(
const
std
::
string
&
prefix
,
const
std
::
string
&
name
)
{
static
std
::
string
GenNodeName
(
const
std
::
string
&
prefix
,
const
std
::
string
&
name
)
{
return
prefix
+
"/"
+
name
;
}
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
static
void
BuildPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
bool
with_fc_bias
)
{
PDNode
*
x
=
pattern
->
NewNode
(
name_scope
,
"x"
)
->
assert_is_op_input
(
"mul"
)
->
assert_var_not_persistable
();
...
...
@@ -35,8 +36,8 @@ void BuildPattern(PDPattern* pattern, const std::string& name_scope,
// LOG(INFO) << "\n" << pattern->DotString();
}
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
static
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_
scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
ca973139
...
...
@@ -519,76 +519,96 @@ bool VarLinksFromOp(Node* node, const std::string& op_type) {
PDNode
*
patterns
::
FC
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
,
bool
with_bias
)
{
// Create Operators
PDNode
*
elementwise_add_op
{
nullptr
};
// mul op
auto
*
mul_op
=
pattern
->
NewNode
(
name_scope
,
"mul"
)
->
assert_is_op
(
"mul"
);
if
(
with_bias
)
{
elementwise_add_op
=
pattern
->
NewNode
(
name_scope
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
}
// Create variables
// w
auto
*
mul_weight_var
=
pattern
->
NewNode
(
name_scope
,
"w"
)
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_nth_input
(
"mul"
,
"Y"
,
0
);
PDNode
*
mul_out_var
{
nullptr
};
->
assert_is_op_input
(
"mul"
,
"Y"
);
PDNode
*
fc_out
{
nullptr
};
if
(
with_bias
)
{
PDNode
*
elementwise_add_op
{
nullptr
};
PDNode
*
mul_out_var
{
nullptr
},
*
bias
{
nullptr
};
elementwise_add_op
=
pattern
->
NewNode
(
name_scope
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
// intermediate variable, will be removed in the IR after fuse.
mul_out_var
=
pattern
->
NewNode
(
name_scope
,
"mul_out"
)
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"mul"
)
->
assert_is_op_input
(
"elementwise_add"
);
}
PDNode
*
bias
{
nullptr
},
*
fc_out
{
nullptr
};
if
(
with_bias
)
{
// bias
bias
=
pattern
->
NewNode
(
name_scope
,
"fc_bias"
)
->
assert_is_op_input
(
"elementwise_add"
)
->
AsInput
(
);
->
AsInput
(
)
->
assert_is_op_input
(
"elementwise_add"
);
// output
fc_out
=
pattern
->
NewNode
(
name_scope
,
"fc_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
mul_op
->
LinksFrom
({
x
,
mul_weight_var
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
bias
}).
LinksTo
({
fc_out
});
}
else
{
fc_out
=
pattern
->
NewNode
(
name_scope
,
"fc_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"mul"
);
}
if
(
with_bias
)
{
mul_op
->
LinksFrom
({
mul_weight_var
,
x
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
bias
}).
LinksTo
({
fc_out
});
}
else
{
mul_op
->
LinksFrom
({
mul_weight_var
,
x
}).
LinksTo
({
fc_out
});
}
return
fc_out
;
}
#define NEW_NODE(op__, arg__, io__) \
auto* arg__ = pattern->NewNode(name_scope, #arg__) \
->assert_is_op_##io__(#op__, #arg__);
PDNode
*
patterns
::
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"lstm"
,
"Input"
);
auto
*
lstm_op
=
pattern
->
NewNode
(
name_scope
,
"lstm"
)
->
assert_is_op
(
"lstm"
);
#define NEW_NODE(arg__, io__) \
auto* arg__ = pattern->NewNode(name_scope, #arg__) \
->assert_is_op_##io__("lstm", #arg__);
// Currently, the H0 and C0 are optional
// TODO(Superjomn) upgrade the fuse framework to support optional.
// NEW_NODE(H0, input);
// NEW_NODE(C0, input);
NEW_NODE
(
Weight
,
input
);
NEW_NODE
(
Bias
,
input
);
NEW_NODE
(
lstm
,
Weight
,
input
);
NEW_NODE
(
lstm
,
Bias
,
input
);
NEW_NODE
(
Hidden
,
output
);
NEW_NODE
(
Cell
,
output
);
NEW_NODE
(
BatchGate
,
output
);
NEW_NODE
(
BatchCellPreAct
,
output
);
NEW_NODE
(
lstm
,
Hidden
,
output
);
NEW_NODE
(
lstm
,
Cell
,
output
);
NEW_NODE
(
lstm
,
BatchGate
,
output
);
NEW_NODE
(
lstm
,
BatchCellPreAct
,
output
);
lstm_op
->
LinksFrom
({
x
,
Weight
,
Bias
});
lstm_op
->
LinksTo
({
Hidden
,
Cell
,
BatchGate
,
BatchCellPreAct
});
return
Hidden
;
}
PDNode
*
patterns
::
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"gru"
,
"Input"
);
auto
*
gru_op
=
pattern
->
NewNode
(
name_scope
,
"gru"
)
->
assert_is_op
(
"gru"
);
NEW_NODE
(
gru
,
Weight
,
input
);
// TODO(Superjomn): upgrade the fuse framework to support optional.
// H0 and bias are optional
NEW_NODE
(
gru
,
Bias
,
input
);
// also optional
// NEW_NODE(H0, input);
NEW_NODE
(
gru
,
Hidden
,
output
);
// below are intermediate
NEW_NODE
(
gru
,
BatchGate
,
output
);
NEW_NODE
(
gru
,
BatchResetHiddenPrev
,
output
);
NEW_NODE
(
gru
,
BatchHidden
,
output
);
BatchGate
->
AsIntermediate
();
BatchResetHiddenPrev
->
AsIntermediate
();
BatchHidden
->
AsIntermediate
();
gru_op
->
LinksFrom
({
x
,
Weight
,
Bias
});
gru_op
->
LinksTo
({
Hidden
,
BatchGate
,
BatchResetHiddenPrev
,
BatchHidden
});
return
Hidden
;
}
#undef NEW_NODE
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
ca973139
...
...
@@ -298,6 +298,8 @@ PDNode* FC(PDPattern* pattern, const std::string& name_scope, PDNode* x,
PDNode
*
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
);
PDNode
*
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
PDNode
*
x
);
}
// namespace patterns
#define IR_NODE_LINK_TO(a, b) \
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -81,7 +81,7 @@ if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
endif
()
inference_analysis_test
(
test_analyzer_ner SRCS analyzer_ner_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
EXTRA_DEPS paddle_inference_api paddle_fluid_api
analysis_predictor
ARGS --infer_model=
${
CHINESE_NER_INSTALL_DIR
}
/model
--infer_data=
${
CHINESE_NER_INSTALL_DIR
}
/data.txt
)
...
...
@@ -94,7 +94,7 @@ if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
endif
()
inference_analysis_test
(
test_analyzer_lac SRCS analyzer_lac_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
EXTRA_DEPS paddle_inference_api paddle_fluid_api
ir_pass_manager analysis_predictor
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
ca973139
...
...
@@ -38,7 +38,6 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
...
...
@@ -69,6 +68,8 @@ class Analyzer : public OrderedRegistry<PassManager> {
"attention_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
}};
...
...
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
浏览文件 @
ca973139
...
...
@@ -11,13 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
fuse_pass_base
.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_
api
.h"
#include "paddle/fluid/inference/api/paddle_inference_
pass
.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path for LAC"
);
...
...
@@ -102,6 +103,7 @@ struct DataRecord {
return
data
;
}
};
void
GetOneBatch
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
auto
one_batch
=
data
->
NextBatch
();
...
...
@@ -114,6 +116,7 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
void
BenchAllData
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
)
{
NativeConfig
config
;
...
...
@@ -141,17 +144,16 @@ void BenchAllData(const std::string &model_path, const std::string &data_file,
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
void
TestLACPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
,
bool
test_all_data
)
{
if
(
test_all_data
)
{
BenchAllData
(
model_path
,
data_file
,
batch_size
,
repeat
);
return
;
}
const
int
repeat
,
bool
test_all_data
,
bool
use_analysis
=
false
)
{
NativeConfig
config
;
config
.
model_dir
=
model_path
;
config
.
use_gpu
=
false
;
...
...
@@ -160,17 +162,47 @@ void TestLACPrediction(const std::string &model_path,
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
Timer
timer
;
if
(
test_all_data
)
{
double
sum
=
0
;
LOG
(
INFO
)
<<
"Total number of samples: "
<<
data
.
datasets
.
size
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
sum
+=
timer
.
toc
();
}
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
LOG
(
INFO
)
<<
"Average latency of each sample: "
<<
sum
/
repeat
/
data
.
datasets
.
size
()
<<
" ms"
;
return
;
}
timer
.
tic
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
// check result
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
auto
&
out
=
outputs_slots
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
...
...
@@ -182,12 +214,60 @@ void TestLACPrediction(const std::string &model_path,
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs_slots
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
pdata
[
i
]);
}
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
}
TEST
(
Analyzer_LAC
,
native
)
{
LOG
(
INFO
)
<<
"LAC with native"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
,
FLAGS_test_all_data
);
}
TEST
(
Analyzer_LAC
,
analysis
)
{
LOG
(
INFO
)
<<
"LAC with analysis"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
,
FLAGS_test_all_data
,
true
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
浏览文件 @
ca973139
...
...
@@ -13,12 +13,12 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
fuse_pass_base
.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_
api
.h"
#include "paddle/fluid/inference/api/paddle_inference_
pass
.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
...
...
@@ -112,7 +112,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
void
TestChineseNERPrediction
()
{
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
NativeConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
...
...
@@ -120,11 +120,23 @@ void TestChineseNERPrediction() {
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
...
...
@@ -165,10 +177,51 @@ void TestChineseNERPrediction() {
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
PADDLE_ENFORCE
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
result
[
i
]);
}
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
}
// Directly infer with the original model.
TEST
(
Analyzer
,
Chinese_ner
)
{
TestChineseNERPrediction
();
}
TEST
(
Analyzer_Chinese_ner
,
native
)
{
TestChineseNERPrediction
(
false
);
}
TEST
(
Analyzer_Chinese_ner
,
analysis
)
{
TestChineseNERPrediction
(
true
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
ca973139
...
...
@@ -283,7 +283,6 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
LOG
(
INFO
)
<<
"===========profile result==========="
;
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
...
...
@@ -324,7 +323,6 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
threads
[
i
].
join
();
}
}
LOG
(
INFO
)
<<
"====================================="
;
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
ca973139
...
...
@@ -45,7 +45,6 @@ endfunction(inference_api_test)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
DEPS paddle_inference_api
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ca973139
...
...
@@ -22,12 +22,25 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
profile
);
namespace
paddle
{
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
auto
tracking_device
=
config_
.
use_gpu
?
platform
::
ProfilerState
::
kAll
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
#endif
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
ca973139
...
...
@@ -124,9 +124,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
LOG
(
INFO
)
<<
"
======
batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms"
;
<<
", latency: "
<<
latency
<<
"ms
======
"
;
}
}
// namespace inference
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录