Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3eb55f06
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3eb55f06
编写于
9月 05, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/op/peephole
上级
d7ac1cc8
6e03f790
变更
31
显示空白变更内容
内联
并排
Showing
31 changed file
with
647 addition
and
378 deletion
+647
-378
doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
...uid/new_docs/user_guides/howto/inference/native_infer.rst
+3
-5
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+1
-27
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+0
-7
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+22
-10
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
+2
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+0
-1
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+3
-0
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+15
-3
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+18
-19
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+20
-7
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+7
-2
paddle/fluid/inference/analysis/flags.h
paddle/fluid/inference/analysis/flags.h
+22
-0
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+6
-3
paddle/fluid/inference/analysis/test_text_classification.cc
paddle/fluid/inference/analysis/test_text_classification.cc
+109
-0
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+13
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+17
-11
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+4
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-1
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+15
-0
paddle/fluid/operators/auc_op.cc
paddle/fluid/operators/auc_op.cc
+12
-17
paddle/fluid/operators/auc_op.h
paddle/fluid/operators/auc_op.h
+66
-87
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+1
-1
paddle/fluid/operators/rmsprop_op.cc
paddle/fluid/operators/rmsprop_op.cc
+24
-1
paddle/fluid/operators/rmsprop_op.h
paddle/fluid/operators/rmsprop_op.h
+17
-4
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+14
-19
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+32
-44
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+29
-3
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+5
-5
python/paddle/fluid/tests/unittests/test_auc_op.py
python/paddle/fluid/tests/unittests/test_auc_op.py
+9
-13
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+157
-83
未找到文件。
doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
浏览文件 @
3eb55f06
...
...
@@ -4,13 +4,12 @@ Paddle 预测 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
contrib/inference>`_
_
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
fluid/inference/api>`
_
包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
- 库文件 ``libpaddle_inference_api.so`` 或
``libpaddle_inference_api.a``
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
...
...
@@ -97,8 +96,7 @@ engine
CHECK(predictor->Run(slots, &outputs));
// 获取 outputs ...
编译时,联编 ``libpaddle_fluid.a/.so`` 和
``libpaddle_inference_api.a/.so`` 便可。
编译时,联编 ``libpaddle_fluid.a/.so`` 便可。
详细代码参考
------------
...
...
paddle/fluid/API.spec
浏览文件 @
3eb55f06
...
...
@@ -312,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC',
200
, 1))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC',
4095
, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
...
...
@@ -376,7 +376,7 @@ paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'l
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'
], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0
))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'
, 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False
))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
3eb55f06
...
...
@@ -326,7 +326,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
ir
::
Graph
&
result
=
*
graph
;
for
(
auto
&
node
:
nodes
)
{
if
(
node
->
NodeType
()
==
ir
::
Node
::
Type
::
kVariable
&&
node
->
Var
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
all_vars_
.
emplace
(
node
->
Name
(),
node
->
Var
());
}
}
...
...
@@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
}
}
bool
MultiDevSSAGraphBuilder
::
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
{
bool
is_pg_once
=
grad_names_
.
count
(
og
)
!=
0
&&
og_has_been_broadcast
->
count
(
og
)
==
0
;
if
(
is_pg_once
)
{
// Insert NCCL AllReduce Op
og_has_been_broadcast
->
insert
(
og
);
}
return
is_pg_once
;
}
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
)
const
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
...
...
@@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
return
var
;
}
// Find the first occurence of `prev_op_name` and make current `op` depend
// on it.
void
MultiDevSSAGraphBuilder
::
ConnectOp
(
ir
::
Graph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
{
for
(
auto
&
prev_op
:
result
->
Get
<
GraphOps
>
(
kGraphOps
))
{
if
(
prev_op
->
Name
()
==
prev_op_name
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
result
->
CreateControlDepVar
());
prev_op
->
AddOutput
(
dep_var
);
result
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
op
->
AddInput
(
dep_var
);
}
}
}
void
MultiDevSSAGraphBuilder
::
CreateDistTrainOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
)
const
{
int
op_dev_id
=
-
1
;
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
3eb55f06
...
...
@@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
ConnectOp
(
ir
::
Graph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
...
...
@@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateComputationalOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
int
dev_id
)
const
;
bool
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
3eb55f06
...
...
@@ -87,15 +87,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
}
op_desc
.
SetInput
(
"Bias"
,
{
new_bias_var
});
}
#undef GET_NODE
// Create temp variables.
scope
->
Var
(
name_scope
+
"/BatchedInput.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
name_scope
+
"/BatchCellPreAct.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
name_scope
+
"/BatchedGate.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetInput
(
"C0"
,
{});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden_n
->
Name
()});
op_desc
.
SetOutput
(
"Cell"
,
{
cell_n
->
Name
()});
op_desc
.
SetOutput
(
"XX"
,
{
xx_n
->
Name
()});
op_desc
.
SetOutput
(
"BatchedInput"
,
{
"blstm_0.tmp_2"
});
op_desc
.
SetOutput
(
"BatchedGate"
,
{
name_scope
+
"/BatchedGate.new"
});
op_desc
.
SetOutput
(
"BatchCellPreAct"
,
{
name_scope
+
"/BatchCellPreAct.new"
});
op_desc
.
SetOutput
(
"BatchedInput"
,
{
name_scope
+
"/BatchedInput.new"
});
op_desc
.
SetAttr
(
"is_reverse"
,
lstm_n
->
Op
()
->
GetAttr
(
"is_reverse"
));
op_desc
.
SetAttr
(
"use_peepholes"
,
lstm_n
->
Op
()
->
GetAttr
(
"use_peepholes"
));
// TODO(TJ): get from attr
...
...
@@ -131,8 +140,8 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
int
fusion_count
{
0
};
auto
fc_no_bias_handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
#define GET_NODE(name__) \
std::string name__##key = name_scope + "/" + #name__; \
auto* name__##n = pattern->RetrieveNode(name__##key); \
...
...
@@ -153,21 +162,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
if
(
with_fc_bias
)
{
GET_NODE
(
fc_bias
);
GET_NODE
(
elementwise_add
);
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
fc_bias
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
lstm_n
,
elementwise_add_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
else
{
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
-
1
);
}
#undef GET_NODE
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
({
mul_n
,
lstm_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
#undef GET_NODE
++
fusion_count
;
};
gpd
(
graph
,
fc_no_bias_
handler
);
gpd
(
graph
,
handler
);
return
fusion_count
;
}
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
浏览文件 @
3eb55f06
...
...
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
3eb55f06
...
...
@@ -73,7 +73,6 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) {
void
GraphPatternDetector
::
operator
()(
Graph
*
graph
,
GraphPatternDetector
::
handle_t
handler
)
{
if
(
!
MarkPDNodesInGraph
(
*
graph
))
{
LOG
(
INFO
)
<<
"Mark failed"
;
return
;
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
3eb55f06
...
...
@@ -19,6 +19,9 @@
#endif
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/inference/analysis/dot.h"
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
3eb55f06
...
...
@@ -74,7 +74,7 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
set
(
CHINESE_NER_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz"
)
set
(
CHINESE_NER_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz"
)
set
(
CHINESE_NER_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/chinese_ner"
CACHE PATH
"Chinese ner model and data root."
FORCE
)
if
(
NOT EXISTS
${
CHINESE_NER_INSTALL_DIR
}
AND WITH_TESTING
)
if
(
NOT EXISTS
${
CHINESE_NER_INSTALL_DIR
}
AND WITH_TESTING
AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_MODEL_URL
}
"chinese_ner_model.tar.gz"
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_DATA_URL
}
"chinese_ner-data.txt.tar.gz"
)
endif
()
...
...
@@ -87,7 +87,7 @@ inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc
set
(
LAC_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz"
)
set
(
LAC_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz"
)
set
(
LAC_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/lac"
CACHE PATH
"LAC model and data root."
FORCE
)
if
(
NOT EXISTS
${
LAC_INSTALL_DIR
}
AND WITH_TESTING
)
if
(
NOT EXISTS
${
LAC_INSTALL_DIR
}
AND WITH_TESTING
AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_MODEL_URL
}
"lac_model.tar.gz"
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_DATA_URL
}
"lac_data.txt.tar.gz"
)
endif
()
...
...
@@ -96,3 +96,15 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
CACHE PATH
"Text Classification model and data root."
FORCE
)
if
(
NOT EXISTS
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
AND WITH_TESTING AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
"text-classification-Senta.tar.gz"
)
endif
()
inference_analysis_test
(
test_text_classification SRCS test_text_classification.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
)
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
3eb55f06
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
...
...
@@ -41,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
public:
DfgPassManagerImpl
()
{
// TODO(Superjomn) set the key with pass reprs.
LOG
(
INFO
)
<<
"-----------------------------------------------------------------"
;
if
(
FLAGS_IA_enable_ir
)
{
AddPass
(
"fluid-to-ir-pass"
,
new
FluidToIrPass
);
}
else
{
if
(
!
FLAGS_IA_enable_ir
)
{
AddPass
(
"fluid-to-data-flow-graph"
,
new
FluidToDataFlowGraphPass
);
}
else
{
AddPass
(
"fluid-to-ir-pass"
,
new
FluidToIrPass
);
}
TryAddTensorRtPass
();
AddPass
(
"data-flow-graph-to-fluid"
,
new
DataFlowGraphToFluidPass
);
if
(
!
FLAGS_IA_output_storage_path
.
empty
())
{
AddPass
(
"model-store-pass"
,
new
ModelStorePass
);
}
LOG
(
INFO
)
<<
"-----------------------------------------------------------------"
;
}
std
::
string
repr
()
const
override
{
return
"dfg-pass-manager"
;
}
...
...
@@ -101,19 +98,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
Analyzer
::
Analyzer
()
{
Register
(
"manager1"
,
new
DfgPassManagerImpl
);
}
void
Analyzer
::
Run
(
Argument
*
argument
)
{
std
::
vector
<
std
::
string
>
passes
;
for
(
auto
&
pass
:
all_ir_passes_
)
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
passes
.
push_back
(
pass
);
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
}
}
passes
.
push_back
(
"graph_viz_pass"
);
// Ugly support fluid-to-ir-pass
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
({
// Manual update the passes here.
"graph_viz_pass"
,
//
"infer_clean_graph_pass"
,
"graph_viz_pass"
,
//
"attention_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"fc_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"mul_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"seq_concat_fc_fuse_pass"
,
"graph_viz_pass"
,
//
"fc_fuse_pass"
,
"graph_viz_pass"
//
}));
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
for
(
auto
&
x
:
data_
)
{
PADDLE_ENFORCE
(
x
->
Initialize
(
argument
));
...
...
@@ -122,6 +116,11 @@ void Analyzer::Run(Argument* argument) {
}
}
Analyzer
&
Analyzer
::
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
disabled_ir_passes_
.
insert
(
passes
.
begin
(),
passes
.
end
());
return
*
this
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
3eb55f06
...
...
@@ -36,16 +36,10 @@ limitations under the License. */
*/
#include <gflags/gflags.h>
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
DECLARE_bool
(
IA_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
IA_graphviz_log_root
);
DECLARE_string
(
IA_output_storage_path
);
DECLARE_bool
(
IA_enable_ir
);
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
...
...
@@ -57,7 +51,26 @@ class Analyzer : public OrderedRegistry<PassManager> {
void
Run
(
Argument
*
argument
);
Analyzer
&
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
private:
// All avaiable IR passes.
// The bigger fuse comes first, so that the small operators prefer to be
// merged in a larger fuse op. The small fusion will not break the pattern of
// larger fusion.
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
// Manual update the passes here.
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
}};
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
};
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
3eb55f06
...
...
@@ -271,17 +271,22 @@ void TestDituRNNPrediction(const std::string &model_path,
const
std
::
string
&
data_path
,
int
batch_size
,
bool
use_analysis
,
bool
activate_ir
,
int
num_times
=
1
)
{
Native
Config
config
;
Analysis
Config
config
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
activate_ir
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
data_path
,
batch_size
);
// Prepare inputs.
...
...
paddle/fluid/inference/analysis/flags.h
0 → 100644
浏览文件 @
3eb55f06
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
DECLARE_bool
(
IA_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
IA_graphviz_log_root
);
DECLARE_string
(
IA_output_storage_path
);
DECLARE_bool
(
IA_enable_ir
);
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
浏览文件 @
3eb55f06
...
...
@@ -15,6 +15,7 @@
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/pass.h"
...
...
@@ -85,9 +86,11 @@ class FluidToIrPass final : public DataFlowGraphPass {
new
Scope
*
(
&
argument_
->
Get
<
Scope
>
(
ir
::
kParamScopeAttr
)));
}
if
(
FLAGS_IA_enable_ir
)
{
const
auto
&
ir_passes_to_apply
=
argument_
->
Get
<
std
::
vector
<
std
::
string
>>
(
kFluidToIrPassesAttr
);
ir_passes
.
Apply
(
ir_passes_to_apply
);
}
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
argument_
->
main_dfg
->
Build
(
ir_passes
.
graph
());
...
...
paddle/fluid/inference/analysis/test_text_classification.cc
0 → 100644
浏览文件 @
3eb55f06
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
DEFINE_string
(
infer_model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
namespace
paddle
{
template
<
typename
T
>
std
::
string
to_string
(
const
std
::
vector
<
T
>
&
vec
)
{
std
::
stringstream
ss
;
for
(
const
auto
&
c
:
vec
)
{
ss
<<
c
<<
" "
;
}
return
ss
.
str
();
}
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
void
Main
(
int
batch_size
)
{
// Three sequence inputs.
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
int64_t
data0
[]
=
{
0
,
1
,
2
};
for
(
auto
&
input
:
input_slots
)
{
input
.
data
.
Reset
(
data0
,
sizeof
(
data0
));
input
.
shape
=
std
::
vector
<
int
>
({
3
,
1
});
// dtype --
input
.
dtype
=
PaddleDType
::
INT64
;
// LoD --
input
.
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({{
0
,
3
}});
}
// shape --
// Create Predictor --
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
config
.
ir_passes
.
push_back
(
"fc_lstm_fuse_pass"
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
}
PrintTime
(
sum
,
batch_size
,
FLAGS_repeat
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
for
(
auto
&
output
:
output_slots
)
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
}
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
3eb55f06
...
...
@@ -44,7 +44,19 @@ function(inference_api_test TARGET_NAME)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api
analysis
ir_pass_manager
pass
fc_fuse_pass
fc_lstm_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
)
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
3eb55f06
...
...
@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
...
...
@@ -28,6 +30,8 @@ bool AnalysisPredictor::Init(
VLOG
(
3
)
<<
"Predictor::init()"
;
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
config_
.
enable_ir_optim
=
false
;
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -73,7 +77,7 @@ bool AnalysisPredictor::Init(
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
LOG
(
INFO
)
<<
"optimize begin"
;
FLAGS_IA_enable_ir
=
true
;
FLAGS_IA_enable_ir
=
config_
.
enable_ir_optim
;
FLAGS_IA_enable_tensorrt_subgraph_engine
=
false
;
FLAGS_IA_output_storage_path
=
""
;
// Don't output the model.
// Analyze inference_program
...
...
@@ -90,24 +94,26 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Analyzer
().
Run
(
&
argument_
);
PADDLE_ENFORCE
(
config_
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
,
"Only kExclude is supported yet."
);
Analyzer
().
DisableIrPasses
(
config_
.
ir_passes
).
Run
(
&
argument_
);
CHECK
(
argument_
.
transformed_program_desc
);
VLOG
(
5
)
<<
"to prepare executor"
;
// LOG(INFO) << "transformed_parogram_desc " <<
// argument.transformed_program_desc->DebugString();
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument_
.
transformed_program_desc
));
PADDLE_ENFORCE
(
argument_
.
Has
(
framework
::
ir
::
kParamScopeAttr
));
if
(
argument_
.
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
// Update scope.
scope_
.
reset
(
argument_
.
Release
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
));
LOG
(
INFO
)
<<
"optimize end =="
;
}
LOG
(
INFO
)
<<
"== optimize end =="
;
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
Native
Config
&
config
)
{
VLOG
(
3
)
<<
"create
NativePredictor
"
;
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
Analysis
Config
&
config
)
{
VLOG
(
3
)
<<
"create
AnalysisConfig
"
;
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
3eb55f06
...
...
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
...
...
@@ -28,7 +30,7 @@ using framework::proto::ProgramDesc;
*/
class
AnalysisPredictor
:
public
NativePaddlePredictor
{
public:
explicit
AnalysisPredictor
(
const
Native
Config
&
config
)
explicit
AnalysisPredictor
(
const
Analysis
Config
&
config
)
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
...
...
@@ -44,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument
&
analysis_argument
()
{
return
argument_
;
}
private:
Native
Config
config_
;
Analysis
Config
config_
;
Argument
argument_
;
};
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
3eb55f06
...
...
@@ -176,7 +176,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::set_feed"
;
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
LOG
(
ERROR
)
<<
"wrong feed input size."
;
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
<<
inputs
.
size
();
return
false
;
}
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
3eb55f06
...
...
@@ -150,6 +150,21 @@ struct TensorRTConfig : public NativeConfig {
int
workspace_size
{
1
<<
30
};
};
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
//
enum
class
IrPassMode
{
kSystem
,
// Use system default passes, not customize.
kInclude
,
// Specify the passes in `ir_passes`.
kExclude
// Specify the disabled passes in `ir_passes`.
};
bool
enable_ir_optim
=
true
;
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
// attention lstm fuse works only on some specific models, disable as default.
std
::
vector
<
std
::
string
>
ir_passes
{
"attention_lstm_fuse_pass"
};
};
// A factory to help create different predictors.
//
// FOR EXTENSION DEVELOPER:
...
...
paddle/fluid/operators/auc_op.cc
浏览文件 @
3eb55f06
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/auc_op.h"
#include <string>
namespace
paddle
{
namespace
operators
{
...
...
@@ -36,15 +35,12 @@ class AucOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
predict_height
,
label_height
,
"Out and Label should have same height."
);
int
num_
thres
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_thresholds"
)
;
int
num_
pred_buckets
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_thresholds"
)
+
1
;
ctx
->
SetOutputDim
(
"AUC"
,
{
1
});
ctx
->
SetOutputDim
(
"TPOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"TNOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"FPOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"FNOut"
,
{
num_thres
});
ctx
->
ShareLoD
(
"Predict"
,
/*->*/
"AUC"
);
ctx
->
SetOutputDim
(
"BatchAUC"
,
{
1
});
ctx
->
SetOutputDim
(
"StatPosOut"
,
{
num_pred_buckets
});
ctx
->
SetOutputDim
(
"StatNegOut"
,
{
num_pred_buckets
});
}
protected:
...
...
@@ -66,25 +62,24 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Label"
,
"A 2D int tensor indicating the label of the training data. "
"shape: [batch_size, 1]"
);
AddInput
(
"TP"
,
"True-Positive value."
);
AddInput
(
"FP"
,
"False-Positive value."
);
AddInput
(
"TN"
,
"True-Negative value."
);
AddInput
(
"FN"
,
"False-Negative value."
);
// TODO(typhoonzero): support weight input
AddInput
(
"StatPos"
,
"Statistic value when label = 1"
);
AddInput
(
"StatNeg"
,
"Statistic value when label = 0"
);
AddOutput
(
"AUC"
,
"A scalar representing the "
"current area-under-the-curve."
);
AddOutput
(
"TPOut"
,
"True-Positive value."
);
AddOutput
(
"FPOut"
,
"False-Positive value."
);
AddOutput
(
"TNOut"
,
"True-Negative value."
);
AddOutput
(
"FNOut"
,
"False-Negative value."
);
AddOutput
(
"BatchAUC"
,
"The AUC for current batch"
);
AddOutput
(
"StatPosOut"
,
"Statistic value when label = 1"
);
AddOutput
(
"StatNegOut"
,
"Statistic value when label = 0"
);
AddAttr
<
std
::
string
>
(
"curve"
,
"Curve type, can be 'ROC' or 'PR'."
)
.
SetDefault
(
"ROC"
);
AddAttr
<
int
>
(
"num_thresholds"
,
"The number of thresholds to use when discretizing the"
" roc curve."
)
.
SetDefault
(
200
);
.
SetDefault
(
(
2
<<
12
)
-
1
);
AddComment
(
R"DOC(
Area Under The Curve (AUC) Operator.
...
...
paddle/fluid/operators/auc_op.h
浏览文件 @
3eb55f06
...
...
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
...
...
@@ -23,106 +23,85 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
class
AucKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predict"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
auc
=
ctx
.
Output
<
Tensor
>
(
"AUC"
);
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predict"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
std
::
string
curve
=
ctx
.
Attr
<
std
::
string
>
(
"curve"
);
int
num_thresholds
=
ctx
.
Attr
<
int
>
(
"num_thresholds"
);
int
num_pred_buckets
=
num_thresholds
+
1
;
// Only use output var for now, make sure it's persistable and
// not cleaned up for each batch.
auto
*
true_positive
=
ctx
.
Output
<
Tensor
>
(
"TPOut"
);
auto
*
false_positive
=
ctx
.
Output
<
Tensor
>
(
"FPOut"
);
auto
*
true_negative
=
ctx
.
Output
<
Tensor
>
(
"TNOut"
);
auto
*
false_negative
=
ctx
.
Output
<
Tensor
>
(
"FNOut"
);
auto
*
auc
=
ctx
.
Output
<
Tensor
>
(
"AUC"
);
auto
*
stat_pos
=
ctx
.
Output
<
Tensor
>
(
"StatPosOut"
);
auto
*
stat_neg
=
ctx
.
Output
<
Tensor
>
(
"StatNegOut"
);
auto
*
auc_data
=
auc
->
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
stat_pos_data
=
stat_pos
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
stat_neg_data
=
stat_neg
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
calcAuc
(
ctx
,
label
,
predict
,
stat_pos_data
,
stat_neg_data
,
num_thresholds
,
auc
);
std
::
string
curve
=
ctx
.
Attr
<
std
::
string
>
(
"curve"
);
int
num_thresholds
=
ctx
.
Attr
<
int
>
(
"num_thresholds"
);
std
::
vector
<
double
>
thresholds_list
;
thresholds_list
.
reserve
(
num_thresholds
);
for
(
int
i
=
1
;
i
<
num_thresholds
-
1
;
i
++
)
{
thresholds_list
[
i
]
=
static_cast
<
double
>
(
i
)
/
(
num_thresholds
-
1
);
auto
*
batch_auc
=
ctx
.
Output
<
Tensor
>
(
"BatchAUC"
);
std
::
vector
<
int64_t
>
stat_pos_batch
(
num_pred_buckets
,
0
);
std
::
vector
<
int64_t
>
stat_neg_batch
(
num_pred_buckets
,
0
);
calcAuc
(
ctx
,
label
,
predict
,
stat_pos_batch
.
data
(),
stat_neg_batch
.
data
(),
num_thresholds
,
batch_auc
);
}
private:
inline
static
double
trapezoidArea
(
double
X1
,
double
X2
,
double
Y1
,
double
Y2
)
{
return
(
X1
>
X2
?
(
X1
-
X2
)
:
(
X2
-
X1
))
*
(
Y1
+
Y2
)
/
2.0
;
}
const
double
kEpsilon
=
1e-7
;
thresholds_list
[
0
]
=
0.0
f
-
kEpsilon
;
thresholds_list
[
num_thresholds
-
1
]
=
1.0
f
+
kEpsilon
;
inline
static
void
calcAuc
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
label
,
const
framework
::
Tensor
*
predict
,
int64_t
*
stat_pos
,
int64_t
*
stat_neg
,
int
num_thresholds
,
framework
::
Tensor
*
auc_tensor
)
{
size_t
batch_size
=
predict
->
dims
()[
0
];
size_t
inference_width
=
predict
->
dims
()[
1
];
const
T
*
inference_data
=
predict
->
data
<
T
>
();
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
const
T
*
inference_data
=
predict
->
data
<
T
>
();
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
auto
*
tp_data
=
true_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
fn_data
=
false_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
tn_data
=
true_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
fp_data
=
false_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
auc
=
auc_tensor
->
mutable_data
<
double
>
(
ctx
.
GetPlace
());
for
(
int
idx_thresh
=
0
;
idx_thresh
<
num_thresholds
;
idx_thresh
++
)
{
// calculate TP, FN, TN, FP for current thresh
int64_t
tp
=
0
,
fn
=
0
,
tn
=
0
,
fp
=
0
;
for
(
size_t
i
=
0
;
i
<
batch_size
;
i
++
)
{
// NOTE: label_data used as bool, labels > 0 will be treated as true.
uint32_t
binIdx
=
static_cast
<
uint32_t
>
(
inference_data
[
i
*
inference_width
+
1
]
*
num_thresholds
);
if
(
label_data
[
i
])
{
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
tp
++
;
}
else
{
fn
++
;
}
}
else
{
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
fp
++
;
stat_pos
[
binIdx
]
+=
1.0
;
}
else
{
tn
++
;
}
}
}
// store rates
tp_data
[
idx_thresh
]
+=
tp
;
fn_data
[
idx_thresh
]
+=
fn
;
tn_data
[
idx_thresh
]
+=
tn
;
fp_data
[
idx_thresh
]
+=
fp
;
}
// epsilon to avoid divide by zero.
double
epsilon
=
1e-6
;
// Riemann sum to caculate auc.
Tensor
tp_rate
,
fp_rate
,
rec_rate
;
tp_rate
.
Resize
({
num_thresholds
});
fp_rate
.
Resize
({
num_thresholds
});
rec_rate
.
Resize
({
num_thresholds
});
auto
*
tp_rate_data
=
tp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
fp_rate_data
=
fp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
rec_rate_data
=
rec_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
num_thresholds
;
i
++
)
{
tp_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
(
tp_data
[
i
]
+
fn_data
[
i
]
+
epsilon
);
fp_rate_data
[
i
]
=
static_cast
<
double
>
(
fp_data
[
i
])
/
(
fp_data
[
i
]
+
tn_data
[
i
]
+
epsilon
);
rec_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
(
tp_data
[
i
]
+
fp_data
[
i
]
+
epsilon
);
stat_neg
[
binIdx
]
+=
1.0
;
}
*
auc_data
=
0.0
f
;
if
(
curve
==
"ROC"
)
{
for
(
int
i
=
0
;
i
<
num_thresholds
-
1
;
i
++
)
{
auto
dx
=
fp_rate_data
[
i
]
-
fp_rate_data
[
i
+
1
];
auto
y
=
(
tp_rate_data
[
i
]
+
tp_rate_data
[
i
+
1
])
/
2.0
f
;
*
auc_data
=
*
auc_data
+
dx
*
y
;
}
}
else
if
(
curve
==
"PR"
)
{
for
(
int
i
=
1
;
i
<
num_thresholds
;
i
++
)
{
auto
dx
=
tp_rate_data
[
i
]
-
tp_rate_data
[
i
-
1
];
auto
y
=
(
rec_rate_data
[
i
]
+
rec_rate_data
[
i
-
1
])
/
2.0
f
;
*
auc_data
=
*
auc_data
+
dx
*
y
;
*
auc
=
0.0
f
;
double
totPos
=
0.0
;
double
totNeg
=
0.0
;
double
totPosPrev
=
0.0
;
double
totNegPrev
=
0.0
;
int
idx
=
num_thresholds
;
while
(
idx
>=
0
)
{
totPosPrev
=
totPos
;
totNegPrev
=
totNeg
;
totPos
+=
stat_pos
[
idx
];
totNeg
+=
stat_neg
[
idx
];
*
auc
+=
trapezoidArea
(
totNeg
,
totNegPrev
,
totPos
,
totPosPrev
);
--
idx
;
}
if
(
totPos
>
0.0
&&
totNeg
>
0.0
)
{
*
auc
=
*
auc
/
totPos
/
totNeg
;
}
}
};
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
3eb55f06
...
...
@@ -57,7 +57,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
}
else
{
PADDLE_ENFORCE_LT
(
ids
[
i
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
,
"ids %d"
,
i
);
memcpy
(
output
+
i
*
row_width
,
table
+
ids
[
i
]
*
row_width
,
row_width
*
sizeof
(
T
));
}
...
...
paddle/fluid/operators/rmsprop_op.cc
浏览文件 @
3eb55f06
...
...
@@ -36,9 +36,13 @@ class RmspropOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(param_out) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MomentOut"
),
"Output(Moment
um_o
ut) of RmspropOp should not be null."
);
"Output(Moment
O
ut) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanSquareOut"
),
"Output(MeanSquareOut) of RmspropOp should not be null."
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanGradOut"
),
"Output(MeanGradOut) of RmspropOp should not be null."
);
}
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
...
...
@@ -58,6 +62,9 @@ class RmspropOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MeanSquareOut"
,
param_dim
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
ctx
->
SetOutputDim
(
"MeanGradOut"
,
param_dim
);
}
}
};
...
...
@@ -70,6 +77,10 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"MeanSquare"
,
"(Tensor, default Tensor<float>)"
" The mean square value that gets updated."
);
AddInput
(
"MeanGrad"
,
"(Tensor, default Tensor<float>)"
" The moving average of gradient"
)
.
AsDispensable
();
AddInput
(
"LearningRate"
,
"(Tensor, default Tensor<float>) "
"The learning rate should be a tensor of size 1."
);
...
...
@@ -82,6 +93,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"ParamOut"
,
"(Tensor) Output updated parameter value."
);
AddOutput
(
"MomentOut"
,
"(Tensor) Output updated moment."
);
AddOutput
(
"MeanSquareOut"
,
"(Tensor) Output Mean squared updated value."
);
AddOutput
(
"MeanGradOut"
,
"(Tensor) Output moving average of gradient updated value."
);
AddAttr
<
float
>
(
"epsilon"
,
"(float, default 1e-10) Constant "
...
...
@@ -93,6 +106,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
0.9
f
);
AddAttr
<
float
>
(
"momentum"
,
"(float, default 0.0) Constant value."
)
.
SetDefault
(
0.0
f
);
AddAttr
<
bool
>
(
"centered"
,
"(bool, default false) use centered rmsprop."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Rmsprop Optimizer.
...
...
@@ -103,6 +118,14 @@ MomentOut = momentum * Moment +
ParamOut = Param - MomentOut
$$
if centered is true:
mean_grad = decay * mean_square{t-1} + (1-decay) * gradient
mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2
mom = momentum * mom{t-1} + learning_rate * g_t /
sqrt(mean_square - mean_grad**2 + epsilon)
param -= mom
The original slides that proposed Rmsprop: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
...
...
paddle/fluid/operators/rmsprop_op.h
浏览文件 @
3eb55f06
...
...
@@ -41,6 +41,7 @@ class RmspropOpKernel : public framework::OpKernel<T> {
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
float
rho
=
ctx
.
Attr
<
float
>
(
"decay"
);
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"Param"
));
auto
ms
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanSquare"
));
...
...
@@ -53,12 +54,24 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto
ms_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_square_out
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
grad
->
numel
(
));
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
static_cast
<
int
>
(
grad
->
numel
()
));
ms_out
.
device
(
place
)
=
rho
*
ms
+
(
1
-
rho
)
*
g
*
g
;
if
(
centered
)
{
auto
mg
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanGrad"
));
auto
*
mean_grad_out
=
ctx
.
Output
<
Tensor
>
(
"MeanGradOut"
);
mean_grad_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
-
mg_out
.
square
()
+
epsilon
).
sqrt
();
}
else
{
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
+
epsilon
).
sqrt
();
}
p_out
.
device
(
place
)
=
p
-
mom_out
;
}
};
...
...
python/paddle/fluid/layers/metric_op.py
浏览文件 @
3eb55f06
...
...
@@ -78,7 +78,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
return
acc_out
def
auc
(
input
,
label
,
curve
=
'ROC'
,
num_thresholds
=
2
00
,
topk
=
1
):
def
auc
(
input
,
label
,
curve
=
'ROC'
,
num_thresholds
=
2
**
12
-
1
,
topk
=
1
):
"""
**Area Under the Curve (AUC) Layer**
...
...
@@ -118,16 +118,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
"""
helper
=
LayerHelper
(
"auc"
,
**
locals
())
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
batch_auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
tp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
tn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
fp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
fn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
for
var
in
[
tp
,
tn
,
fp
,
fn
]:
stat_pos
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
+
1
])
stat_neg
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
+
1
])
for
var
in
[
stat_pos
,
stat_neg
]:
helper
.
set_variable_initializer
(
var
,
Constant
(
value
=
0.0
,
force_cpu
=
True
))
...
...
@@ -137,18 +135,15 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
inputs
=
{
"Predict"
:
[
input
],
"Label"
:
[
label
],
"TP"
:
[
tp
],
"TN"
:
[
tn
],
"FP"
:
[
fp
],
"FN"
:
[
fn
]
"StatPos"
:
[
stat_pos
],
"StatNeg"
:
[
stat_neg
]
},
attrs
=
{
"curve"
:
curve
,
"num_thresholds"
:
num_thresholds
},
outputs
=
{
"AUC"
:
[
auc_out
],
"TPOut"
:
[
tp
],
"TNOut"
:
[
tn
],
"FPOut"
:
[
fp
],
"FNOut"
:
[
fn
]
"BatchAUC"
:
[
batch_auc_out
],
"StatPosOut"
:
[
stat_pos
],
"StatNegOut"
:
[
stat_neg
]
})
return
auc_out
,
[
tp
,
tn
,
fp
,
fn
]
return
auc_out
,
batch_auc_out
,
[
stat_pos
,
stat_neg
]
python/paddle/fluid/metrics.py
浏览文件 @
3eb55f06
...
...
@@ -558,8 +558,6 @@ class Auc(MetricBase):
name: metric name
curve: Specifies the name of the curve to be computed, 'ROC' [default] or
'PR' for the Precision-Recall-curve.
num_thresholds: The number of thresholds to use when discretizing the roc
curve.
"NOTE: only implement the ROC curve type via Python now."
...
...
@@ -574,15 +572,14 @@ class Auc(MetricBase):
numpy_auc = metric.eval()
"""
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
200
):
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
4095
):
super
(
Auc
,
self
).
__init__
(
name
=
name
)
self
.
_curve
=
curve
self
.
_num_thresholds
=
num_thresholds
self
.
_epsilon
=
1e-6
self
.
tp_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
tn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fp_list
=
np
.
zeros
((
num_thresholds
,
))
_num_pred_buckets
=
num_thresholds
+
1
self
.
_stat_pos
=
[
0
]
*
_num_pred_buckets
self
.
_stat_neg
=
[
0
]
*
_num_pred_buckets
def
update
(
self
,
preds
,
labels
):
if
not
_is_numpy_
(
labels
):
...
...
@@ -590,41 +587,32 @@ class Auc(MetricBase):
if
not
_is_numpy_
(
preds
):
raise
ValueError
(
"The 'predictions' must be a numpy ndarray."
)
kepsilon
=
1e-7
# to account for floating point imprecisions
thresholds
=
[(
i
+
1
)
*
1.0
/
(
self
.
_num_thresholds
-
1
)
for
i
in
range
(
self
.
_num_thresholds
-
2
)]
thresholds
=
[
0.0
-
kepsilon
]
+
thresholds
+
[
1.0
+
kepsilon
]
# calculate TP, FN, TN, FP count
for
idx_thresh
,
thresh
in
enumerate
(
thresholds
):
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
for
i
,
lbl
in
enumerate
(
labels
):
value
=
preds
[
i
,
1
]
bin_idx
=
int
(
value
*
self
.
_num_thresholds
)
assert
bin_idx
<=
self
.
_num_thresholds
if
lbl
:
if
preds
[
i
,
1
]
>=
thresh
:
tp
+=
1
else
:
fn
+=
1
self
.
_stat_pos
[
bin_idx
]
+=
1.0
else
:
if
preds
[
i
,
1
]
>=
thresh
:
fp
+=
1
else
:
tn
+=
1
self
.
tp_list
[
idx_thresh
]
+=
tp
self
.
fn_list
[
idx_thresh
]
+=
fn
self
.
tn_list
[
idx_thresh
]
+=
tn
self
.
fp_list
[
idx_thresh
]
+=
fp
self
.
_stat_neg
[
bin_idx
]
+=
1.0
@
staticmethod
def
trapezoid_area
(
x1
,
x2
,
y1
,
y2
):
return
abs
(
x1
-
x2
)
*
(
y1
+
y2
)
/
2.0
def
eval
(
self
):
epsilon
=
self
.
_epsilon
num_thresholds
=
self
.
_num_thresholds
tpr
=
(
self
.
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
self
.
tp_list
+
self
.
fn_list
+
epsilon
)
fpr
=
self
.
fp_list
.
astype
(
"float32"
)
/
(
self
.
fp_list
+
self
.
tn_list
+
epsilon
)
rec
=
(
self
.
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
self
.
tp_list
+
self
.
fp_list
+
epsilon
)
x
=
fpr
[:
num_thresholds
-
1
]
-
fpr
[
1
:]
y
=
(
tpr
[:
num_thresholds
-
1
]
+
tpr
[
1
:])
/
2.0
auc_value
=
np
.
sum
(
x
*
y
)
return
auc_value
tot_pos
=
0.0
tot_neg
=
0.0
auc
=
0.0
idx
=
self
.
_num_thresholds
while
idx
>=
0
:
tot_pos_prev
=
tot_pos
tot_neg_prev
=
tot_neg
tot_pos
+=
self
.
_stat_pos
[
idx
]
tot_neg
+=
self
.
_stat_neg
[
idx
]
auc
+=
self
.
trapezoid_area
(
tot_neg
,
tot_neg_prev
,
tot_pos
,
tot_pos_prev
)
idx
-=
1
return
auc
/
tot_pos
/
tot_neg
if
tot_pos
>
0.0
and
tot_neg
>
0.0
else
0.0
python/paddle/fluid/optimizer.py
浏览文件 @
3eb55f06
...
...
@@ -897,7 +897,20 @@ class RMSPropOptimizer(Optimizer):
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{v(w,t) +
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) +
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w - v(w, t)
if centered is True:
.. math::
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
g(w, t) & =
\\
rho g(w, t-1) + (1 -
\\
rho)
\\
nabla Q_{i}(w)
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) - (g(w, t))^2 +
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w - v(w, t)
...
...
@@ -915,6 +928,10 @@ class RMSPropOptimizer(Optimizer):
avoid division by zero, set 1e-6 by default.
momentum(float): :math:`
\\
beta` in equation is the momentum term,
set 0.0 by default.
centered(bool): If True, gradients are normalized by the estimated variance of
the gradient; if False, by the uncentered second moment. Setting this to
True may help with training, but is slightly more expensive in terms of
computation and memory. Defaults to False.
Raises:
ValueError: If learning_rate, rho, epsilon, momentum are None.
...
...
@@ -928,12 +945,14 @@ class RMSPropOptimizer(Optimizer):
_momentum_acc_str
=
"momentum"
_mean_square_acc_str
=
"mean_square"
_mean_grad_acc_str
=
"mean_grad"
def
__init__
(
self
,
learning_rate
,
rho
=
0.95
,
epsilon
=
1.0e-6
,
momentum
=
0.0
,
centered
=
False
,
**
kwargs
):
super
(
RMSPropOptimizer
,
self
).
__init__
(
learning_rate
=
learning_rate
,
**
kwargs
)
...
...
@@ -950,6 +969,7 @@ class RMSPropOptimizer(Optimizer):
self
.
_rho
=
rho
self
.
_epsilon
=
epsilon
self
.
_momentum
=
momentum
self
.
_centered
=
centered
def
_create_accumulators
(
self
,
block
,
parameters
):
if
not
isinstance
(
block
,
framework
.
Block
):
...
...
@@ -958,6 +978,7 @@ class RMSPropOptimizer(Optimizer):
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_momentum_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_mean_square_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_mean_grad_acc_str
,
p
)
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
if
not
isinstance
(
block
,
framework
.
Block
):
...
...
@@ -967,6 +988,8 @@ class RMSPropOptimizer(Optimizer):
param_and_grad
[
0
])
mean_square_acc
=
self
.
_get_accumulator
(
self
.
_mean_square_acc_str
,
param_and_grad
[
0
])
mean_grad_acc
=
self
.
_get_accumulator
(
self
.
_mean_grad_acc_str
,
param_and_grad
[
0
])
rmsprop_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
...
...
@@ -974,17 +997,20 @@ class RMSPropOptimizer(Optimizer):
"Grad"
:
param_and_grad
[
1
],
"Moment"
:
momentum_acc
,
"MeanSquare"
:
mean_square_acc
,
"MeanGrad"
:
mean_grad_acc
,
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"MomentOut"
:
momentum_acc
,
"MeanSquareOut"
:
mean_square_acc
"MeanSquareOut"
:
mean_square_acc
,
"MeanGradOut"
:
mean_grad_acc
},
attrs
=
{
"epsilon"
:
self
.
_epsilon
,
"decay"
:
self
.
_rho
,
"momentum"
:
self
.
_momentum
"momentum"
:
self
.
_momentum
,
"centered"
:
self
.
_centered
})
return
rmsprop_op
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
3eb55f06
...
...
@@ -291,7 +291,7 @@ class OpTest(unittest.TestCase):
return_numpy
=
False
)
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
):
def
check_output_with_place
(
self
,
place
,
atol
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
if
out_name
not
in
self
.
outputs
:
...
...
@@ -321,7 +321,7 @@ class OpTest(unittest.TestCase):
if
isinstance
(
expect
,
tuple
)
else
expect
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
sub_out_name
+
") has diff at "
+
str
(
place
))
if
isinstance
(
expect
,
tuple
):
...
...
@@ -337,7 +337,7 @@ class OpTest(unittest.TestCase):
expect_t
=
expect
[
0
]
if
isinstance
(
expect
,
tuple
)
else
expect
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
...
...
@@ -360,10 +360,10 @@ class OpTest(unittest.TestCase):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
):
def
check_output
(
self
,
atol
=
1e-5
,
equal_nan
=
False
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
)
self
.
check_output_with_place
(
place
,
atol
,
equal_nan
)
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/test_auc_op.py
浏览文件 @
3eb55f06
...
...
@@ -26,18 +26,15 @@ class TestAucOp(OpTest):
pred
=
np
.
random
.
random
((
128
,
2
)).
astype
(
"float32"
)
labels
=
np
.
random
.
randint
(
0
,
2
,
(
128
,
1
))
num_thresholds
=
200
tp
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
tn
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
fp
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
fn
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
stat_pos
=
np
.
zeros
((
num_thresholds
+
1
,
)).
astype
(
"int64"
)
stat_neg
=
np
.
zeros
((
num_thresholds
+
1
,
)).
astype
(
"int64"
)
self
.
inputs
=
{
'Predict'
:
pred
,
'Label'
:
labels
,
'TP'
:
tp
,
'TN'
:
tn
,
'FP'
:
fp
,
'FN'
:
fn
"StatPos"
:
stat_pos
,
"StatNeg"
:
stat_neg
}
self
.
attrs
=
{
'curve'
:
'ROC'
,
'num_thresholds'
:
num_thresholds
}
...
...
@@ -47,11 +44,10 @@ class TestAucOp(OpTest):
python_auc
.
update
(
pred
,
labels
)
self
.
outputs
=
{
'AUC'
:
python_auc
.
eval
(),
'TPOut'
:
python_auc
.
tp_list
,
'FNOut'
:
python_auc
.
fn_list
,
'TNOut'
:
python_auc
.
tn_list
,
'FPOut'
:
python_auc
.
fp_list
'AUC'
:
np
.
array
(
python_auc
.
eval
()),
'BatchAUC'
:
np
.
array
(
python_auc
.
eval
()),
'StatPosOut'
:
np
.
array
(
python_auc
.
_stat_pos
),
'StatNegOut'
:
np
.
array
(
python_auc
.
_stat_neg
)
}
def
test_check_output
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
浏览文件 @
3eb55f06
...
...
@@ -15,90 +15,164 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestRmspropOp1
(
OpTest
):
''' Test RMSProp with explicit inputs
'''
def
setUp
(
self
):
self
.
op_type
=
"rmsprop"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
epsilon
=
1e-6
decay
=
0.9
momentum
=
0.0
self
.
inputs
=
{
'Param'
:
param
,
'MeanSquare'
:
mean_square
,
'LearningRate'
:
learning_rate
,
'Grad'
:
grad
,
'Moment'
:
moment
,
}
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'decay'
:
decay
,
'momentum'
:
momentum
}
ms_out
=
decay
*
mean_square
+
(
1
-
decay
)
*
grad
*
grad
moment_out
=
momentum
*
moment
+
\
learning_rate
*
grad
/
np
.
sqrt
(
ms_out
+
epsilon
)
param_out
=
param
-
moment_out
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'MeanSquareOut'
:
ms_out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestRmspropOp2
(
OpTest
):
'''Test RMSProp with default values for attributes
'''
def
setUp
(
self
):
self
.
op_type
=
"rmsprop"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
epsilon
=
1.0e-10
decay
=
0.9
momentum
=
0.0
self
.
inputs
=
{
'Param'
:
param
,
'MeanSquare'
:
mean_square
,
'LearningRate'
:
learning_rate
,
'Grad'
:
grad
,
'Moment'
:
moment
,
}
ms_out
=
decay
*
mean_square
+
(
1
-
decay
)
*
grad
*
grad
moment_out
=
momentum
*
moment
+
\
learning_rate
*
grad
/
np
.
sqrt
(
ms_out
+
epsilon
)
param_out
=
param
-
moment_out
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'MeanSquareOut'
:
ms_out
}
def
test_check_output
(
self
):
self
.
check_output
()
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
class
TestBase
(
unittest
.
TestCase
):
def
setup
(
self
,
centered
,
epsilon
=
1e-6
):
np
.
random
.
seed
(
5
)
# fix seed
self
.
param_name
=
"param"
self
.
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
mean_square_name
=
"mean_square"
self
.
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
mean_grad_name
=
"mean_grad"
self
.
mean_grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
lr_name
=
"lr"
self
.
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
self
.
grad_name
=
"grad"
self
.
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
moment_name
=
"moment"
self
.
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
self
.
epsilon
=
epsilon
self
.
decay
=
0.9
self
.
momentum
=
0.0
self
.
centered
=
centered
self
.
ms_out
=
self
.
decay
*
self
.
mean_square
+
(
1
-
self
.
decay
)
*
self
.
grad
*
self
.
grad
if
centered
:
self
.
mg_out
=
self
.
decay
*
self
.
mean_grad
+
(
1
-
self
.
decay
)
*
self
.
grad
self
.
moment_out
=
self
.
momentum
*
self
.
moment
+
\
self
.
learning_rate
*
self
.
grad
/
np
.
sqrt
(
self
.
ms_out
-
np
.
square
(
self
.
mg_out
)
+
self
.
epsilon
)
else
:
self
.
moment_out
=
self
.
momentum
*
self
.
moment
+
\
self
.
learning_rate
*
self
.
grad
/
np
.
sqrt
(
self
.
ms_out
+
self
.
epsilon
)
self
.
param_out
=
self
.
param
-
self
.
moment_out
def
check
(
self
,
actual_t
,
expect_t
,
place
,
out_name
,
atol
=
1e-5
,
equal_nan
=
False
):
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
class
TestRmspropOp
(
TestBase
):
def
check_with_place
(
self
,
place
,
centered
,
epsilon
):
self
.
setup
(
centered
,
epsilon
)
scope
=
core
.
Scope
()
# create and initialize Param Variable
param
=
scope
.
var
(
self
.
param_name
).
get_tensor
()
param
.
set
(
self
.
param
,
place
)
mean_square
=
scope
.
var
(
self
.
mean_square_name
).
get_tensor
()
mean_square
.
set
(
self
.
mean_square
,
place
)
lr
=
scope
.
var
(
self
.
lr_name
).
get_tensor
()
lr
.
set
(
self
.
learning_rate
,
place
)
grad
=
scope
.
var
(
self
.
grad_name
).
get_tensor
()
grad
.
set
(
self
.
grad
,
place
)
moment
=
scope
.
var
(
self
.
moment_name
).
get_tensor
()
moment
.
set
(
self
.
moment
,
place
)
# create and run sgd operator
if
self
.
centered
:
mean_grad
=
scope
.
var
(
self
.
mean_grad_name
).
get_tensor
()
mean_grad
.
set
(
self
.
mean_grad
,
place
)
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
MeanGrad
=
self
.
mean_grad_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
MeanGradOut
=
self
.
mean_grad_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
True
)
else
:
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
False
)
rmsprop_op
.
run
(
scope
,
place
)
atol
=
1e-5
equal_nan
=
False
if
self
.
centered
:
atol
=
1e-3
equal_nan
=
True
self
.
check
(
np
.
array
(
mean_square
),
self
.
ms_out
,
place
,
self
.
mean_square_name
)
self
.
check
(
np
.
array
(
moment
),
self
.
moment_out
,
place
,
self
.
moment_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
self
.
check
(
np
.
array
(
param
),
self
.
param_out
,
place
,
self
.
param_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
if
self
.
centered
:
self
.
check
(
np
.
array
(
mean_grad
),
self
.
mg_out
,
place
,
self
.
mean_grad_name
)
def
test_rmsprop
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
,
False
,
1e-6
)
self
.
check_with_place
(
place
,
False
,
1e-10
)
self
.
check_with_place
(
place
,
True
,
1e-6
)
self
.
check_with_place
(
place
,
True
,
1e-10
)
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录