diff --git a/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst b/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst index 21a6fe5cf54d0c0c760ade4ba602024ffa29675f..6d6f3035c0b5c985cd39d45df9f1bcce50dcefa0 100644 --- a/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst +++ b/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst @@ -4,13 +4,12 @@ Paddle 预测 API 为了更简单方便的预测部署,Fluid 提供了一套高层 API 用来隐藏底层不同的优化实现。 -`预测库相关代码 `__ +`预测库相关代码 `_ 包括 - 头文件 ``paddle_inference_api.h`` 定义了所有的接口 - 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a`` -- 库文件 ``libpaddle_inference_api.so`` 或 - ``libpaddle_inference_api.a`` + 编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。 @@ -97,8 +96,7 @@ engine CHECK(predictor->Run(slots, &outputs)); // 获取 outputs ... -编译时,联编 ``libpaddle_fluid.a/.so`` 和 -``libpaddle_inference_api.a/.so`` 便可。 +编译时,联编 ``libpaddle_fluid.a/.so`` 便可。 详细代码参考 ------------ diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index bb5f2894c08b5d8941ad8914f6b83280aa053e37..70e5b97770a6c581c6a9c0145b03c42b83f14471 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -312,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) -paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 200, 1)) +paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 4095, 1)) paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) @@ -376,7 +376,7 @@ paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'l paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5)) paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0)) +paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False)) paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95)) paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 0bfff745493d069e948e6d277ec2bbfb0673a70b..7a99169849debcbc57d6f197b36c5045b211f3ef 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -326,7 +326,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl( ir::Graph &result = *graph; for (auto &node : nodes) { - if (node->NodeType() == ir::Node::Type::kVariable && node->Var()) { + if (node->IsVar() && node->Var()) { all_vars_.emplace(node->Name(), node->Var()); } } @@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp( } } -bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( - const std::string &og, - std::unordered_set *og_has_been_broadcast) const { - bool is_pg_once = - grad_names_.count(og) != 0 && og_has_been_broadcast->count(og) == 0; - if (is_pg_once) { - // Insert NCCL AllReduce Op - og_has_been_broadcast->insert(og); - } - return is_pg_once; -} - int MultiDevSSAGraphBuilder::GetOpDeviceID(const ir::Graph &graph, ir::Node *node) const { if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) { @@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result, return var; } -// Find the first occurence of `prev_op_name` and make current `op` depend -// on it. -void MultiDevSSAGraphBuilder::ConnectOp(ir::Graph *result, OpHandleBase *op, - const std::string &prev_op_name) const { - for (auto &prev_op : result->Get(kGraphOps)) { - if (prev_op->Name() == prev_op_name) { - auto *dep_var = new DummyVarHandle(result->CreateControlDepVar()); - prev_op->AddOutput(dep_var); - result->Get(kGraphDepVars).emplace(dep_var); - op->AddInput(dep_var); - } - } -} - void MultiDevSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result, ir::Node *node) const { int op_dev_id = -1; diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h index 7a6f238f9cf7af18cb10ea271e453fec1902c833..ac6d9c5a64cfde60f75c76dae0a30cc7d735e996 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h @@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass { std::vector FindDistTrainRecvVars( const std::vector &nodes) const; - void ConnectOp(ir::Graph *result, OpHandleBase *op, - const std::string &prev_op_name) const; - void CreateComputationalOps(ir::Graph *result, ir::Node *node, size_t num_places) const; @@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass { void CreateComputationalOp(ir::Graph *result, ir::Node *node, int dev_id) const; - bool IsParameterGradientOnce( - const std::string &og, - std::unordered_set *og_has_been_broadcast) const; - int GetOpDeviceID(const ir::Graph &graph, ir::Node *node) const; void InsertAllReduceOp(ir::Graph *result, const std::string &og) const; diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 00f5e7fad2ef5d42eb0de9703389e910090d93c1..9512fd056e73836cdc34a9e409ab2d7809a6aff6 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -87,15 +87,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, } op_desc.SetInput("Bias", {new_bias_var}); } - #undef GET_NODE + // Create temp variables. + scope->Var(name_scope + "/BatchedInput.new") + ->GetMutable(); + scope->Var(name_scope + "/BatchCellPreAct.new") + ->GetMutable(); + scope->Var(name_scope + "/BatchedGate.new") + ->GetMutable(); + op_desc.SetInput("H0", {}); op_desc.SetInput("C0", {}); op_desc.SetOutput("Hidden", {hidden_n->Name()}); op_desc.SetOutput("Cell", {cell_n->Name()}); op_desc.SetOutput("XX", {xx_n->Name()}); - op_desc.SetOutput("BatchedInput", {"blstm_0.tmp_2"}); + op_desc.SetOutput("BatchedGate", {name_scope + "/BatchedGate.new"}); + op_desc.SetOutput("BatchCellPreAct", {name_scope + "/BatchCellPreAct.new"}); + op_desc.SetOutput("BatchedInput", {name_scope + "/BatchedInput.new"}); op_desc.SetAttr("is_reverse", lstm_n->Op()->GetAttr("is_reverse")); op_desc.SetAttr("use_peepholes", lstm_n->Op()->GetAttr("use_peepholes")); // TODO(TJ): get from attr @@ -131,8 +140,8 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, int fusion_count{0}; - auto fc_no_bias_handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { #define GET_NODE(name__) \ std::string name__##key = name_scope + "/" + #name__; \ auto* name__##n = pattern->RetrieveNode(name__##key); \ @@ -153,21 +162,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, if (with_fc_bias) { GET_NODE(fc_bias); + GET_NODE(elementwise_add); lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, fc_bias); + // Remove unneeded nodes. + std::unordered_set marked_nodes( + {mul_n, lstm_n, elementwise_add_n}); + GraphSafeRemoveNodes(graph, marked_nodes); } else { lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, -1); + // Remove unneeded nodes. + std::unordered_set marked_nodes({mul_n, lstm_n}); + GraphSafeRemoveNodes(graph, marked_nodes); } #undef GET_NODE - // Remove unneeded nodes. - std::unordered_set marked_nodes({mul_n, lstm_n}); - - GraphSafeRemoveNodes(graph, marked_nodes); - ++fusion_count; }; - gpd(graph, fc_no_bias_handler); + gpd(graph, handler); return fusion_count; } diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h index 5a6687872eb3ab4a032227fda9ff0e7f5254670b..3ee32c63a46fcc34bdccd1e14d4bbaf9668c49e9 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index a4da69a0a2ea44806b68a27647213759ebd387b1..434bee4ccee1c199088d09c934fe86435ec7d095 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -73,7 +73,6 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) { void GraphPatternDetector::operator()(Graph* graph, GraphPatternDetector::handle_t handler) { if (!MarkPDNodesInGraph(*graph)) { - LOG(INFO) << "Mark failed"; return; } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 9d67c4a6997dfe19561f37bf3ea76eba8b59ff35..eacea1750f6f1e86a8fe79637c3bd757a7275398 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -19,6 +19,9 @@ #endif #include +#include +#include +#include #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/inference/analysis/dot.h" diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index dadc8a53706fb9edff884dcf6d49168bfef3aa30..f2e18a461fd221252e4a10262a13bc8e942f5988 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -58,7 +58,7 @@ endif() inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor ARGS --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model - --infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt) + --infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt) inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc) inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc) @@ -74,7 +74,7 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc) set(CHINESE_NER_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz") set(CHINESE_NER_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz") set(CHINESE_NER_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/chinese_ner" CACHE PATH "Chinese ner model and data root." FORCE) -if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING) +if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE) inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_MODEL_URL} "chinese_ner_model.tar.gz") inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_DATA_URL} "chinese_ner-data.txt.tar.gz") endif() @@ -87,7 +87,7 @@ inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc set(LAC_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz") set(LAC_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz") set(LAC_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/lac" CACHE PATH "LAC model and data root." FORCE) -if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING) +if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE) inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_MODEL_URL} "lac_model.tar.gz") inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_DATA_URL} "lac_data.txt.tar.gz") endif() @@ -96,3 +96,15 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc EXTRA_DEPS paddle_inference_api paddle_fluid_api ARGS --infer_model=${LAC_INSTALL_DIR}/model --infer_data=${LAC_INSTALL_DIR}/data.txt) + + +set(TEXT_CLASSIFICATION_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz") +set(TEXT_CLASSIFICATION_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/text_classification" CACHE PATH "Text Classification model and data root." FORCE) + +if (NOT EXISTS ${TEXT_CLASSIFICATION_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE) + inference_download_and_uncompress(${TEXT_CLASSIFICATION_INSTALL_DIR} ${TEXT_CLASSIFICATION_MODEL_URL} "text-classification-Senta.tar.gz") +endif() + +inference_analysis_test(test_text_classification SRCS test_text_classification.cc + EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor + ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta) diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 192ac2daa6a78efec6db19870f71e80593c62da9..ca834406451d53fd44887300561a6327d97cafcd 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include +#include #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" @@ -41,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager { public: DfgPassManagerImpl() { // TODO(Superjomn) set the key with pass reprs. - LOG(INFO) - << "-----------------------------------------------------------------"; - if (FLAGS_IA_enable_ir) { - AddPass("fluid-to-ir-pass", new FluidToIrPass); - } else { + if (!FLAGS_IA_enable_ir) { AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass); + } else { + AddPass("fluid-to-ir-pass", new FluidToIrPass); } TryAddTensorRtPass(); AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass); if (!FLAGS_IA_output_storage_path.empty()) { AddPass("model-store-pass", new ModelStorePass); } - LOG(INFO) - << "-----------------------------------------------------------------"; } std::string repr() const override { return "dfg-pass-manager"; } @@ -101,19 +98,16 @@ class DfgPassManagerImpl final : public DfgPassManager { Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); } void Analyzer::Run(Argument* argument) { + std::vector passes; + for (auto& pass : all_ir_passes_) { + if (!disabled_ir_passes_.count(pass)) { + passes.push_back(pass); + passes.push_back("graph_viz_pass"); // add graphviz for debug. + } + } + passes.push_back("graph_viz_pass"); // Ugly support fluid-to-ir-pass - argument->Set(kFluidToIrPassesAttr, - new std::vector({ - // Manual update the passes here. - "graph_viz_pass", // - "infer_clean_graph_pass", "graph_viz_pass", // - "attention_lstm_fuse_pass", "graph_viz_pass", // - "fc_lstm_fuse_pass", "graph_viz_pass", // - "mul_lstm_fuse_pass", "graph_viz_pass", // - "seq_concat_fc_fuse_pass", "graph_viz_pass", // - "fc_fuse_pass", "graph_viz_pass" // - - })); + argument->Set(kFluidToIrPassesAttr, new std::vector(passes)); for (auto& x : data_) { PADDLE_ENFORCE(x->Initialize(argument)); @@ -122,6 +116,11 @@ void Analyzer::Run(Argument* argument) { } } +Analyzer& Analyzer::DisableIrPasses(const std::vector& passes) { + disabled_ir_passes_.insert(passes.begin(), passes.end()); + return *this; +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 2e107c82dd50d5cf22797f4c82e69d302514f955..3fdd2b9ec7537c891a04efb3ca9a1d45075ffa5e 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -36,16 +36,10 @@ limitations under the License. */ */ #include +#include "paddle/fluid/inference/analysis/flags.h" #include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/pass_manager.h" -// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this -// flag if not available. -DECLARE_bool(IA_enable_tensorrt_subgraph_engine); -DECLARE_string(IA_graphviz_log_root); -DECLARE_string(IA_output_storage_path); -DECLARE_bool(IA_enable_ir); - namespace paddle { namespace inference { namespace analysis { @@ -57,7 +51,26 @@ class Analyzer : public OrderedRegistry { void Run(Argument* argument); + Analyzer& DisableIrPasses(const std::vector& passes); + DISABLE_COPY_AND_ASSIGN(Analyzer); + + private: + // All avaiable IR passes. + // The bigger fuse comes first, so that the small operators prefer to be + // merged in a larger fuse op. The small fusion will not break the pattern of + // larger fusion. + const std::vector all_ir_passes_{{ + // Manual update the passes here. + "infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // + }}; + + std::unordered_set disabled_ir_passes_; }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 59e103e1179240a100d492a2475573c8188bebe7..94be6733f63488634ee1302e015a57c9a8892d84 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -271,17 +271,22 @@ void TestDituRNNPrediction(const std::string &model_path, const std::string &data_path, int batch_size, bool use_analysis, bool activate_ir, int num_times = 1) { - NativeConfig config; + AnalysisConfig config; config.prog_file = FLAGS_infer_ditu_rnn_model + "/__model__"; config.param_file = FLAGS_infer_ditu_rnn_model + "/param"; config.use_gpu = false; config.device = 0; config.specify_input_name = true; + config.enable_ir_optim = activate_ir; + PADDLE_ENFORCE(config.ir_mode == + AnalysisConfig::IrPassMode::kExclude); // default + config.ir_passes.clear(); // Do not exclude any pass. auto base_predictor = CreatePaddlePredictor(config); auto predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor( + config); std::vector input_slots; DataRecord data(data_path, batch_size); // Prepare inputs. diff --git a/paddle/fluid/inference/analysis/flags.h b/paddle/fluid/inference/analysis/flags.h new file mode 100644 index 0000000000000000000000000000000000000000..717e543f01dfa071865a5c14c0b7679e65239daf --- /dev/null +++ b/paddle/fluid/inference/analysis/flags.h @@ -0,0 +1,22 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this +// flag if not available. +DECLARE_bool(IA_enable_tensorrt_subgraph_engine); +DECLARE_string(IA_graphviz_log_root); +DECLARE_string(IA_output_storage_path); +DECLARE_bool(IA_enable_ir); diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h index 6731b1f759363eec5dd8645783212a72ace67b2f..3086085710d6e850ed27e82d2323690dfdd3ef19 100644 --- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h +++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h @@ -15,6 +15,7 @@ #pragma once #include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/inference/analysis/flags.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h" #include "paddle/fluid/inference/analysis/pass.h" @@ -85,9 +86,11 @@ class FluidToIrPass final : public DataFlowGraphPass { new Scope *(&argument_->Get(ir::kParamScopeAttr))); } - const auto &ir_passes_to_apply = - argument_->Get>(kFluidToIrPassesAttr); - ir_passes.Apply(ir_passes_to_apply); + if (FLAGS_IA_enable_ir) { + const auto &ir_passes_to_apply = + argument_->Get>(kFluidToIrPassesAttr); + ir_passes.Apply(ir_passes_to_apply); + } PADDLE_ENFORCE(argument_->main_dfg.get()); argument_->main_dfg->Build(ir_passes.graph()); diff --git a/paddle/fluid/inference/analysis/test_text_classification.cc b/paddle/fluid/inference/analysis/test_text_classification.cc new file mode 100644 index 0000000000000000000000000000000000000000..2913824f62301795aea967c22021b2af11f343c1 --- /dev/null +++ b/paddle/fluid/inference/analysis/test_text_classification.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. +#include +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/timer.h" + +DEFINE_string(infer_model, "", "Directory of the inference model."); +DEFINE_string(infer_data, "", "Path of the dataset."); +DEFINE_int32(batch_size, 1, "batch size."); +DEFINE_int32(repeat, 1, "How many times to repeat run."); + +namespace paddle { + +template +std::string to_string(const std::vector &vec) { + std::stringstream ss; + for (const auto &c : vec) { + ss << c << " "; + } + return ss.str(); +} + +void PrintTime(const double latency, const int bs, const int repeat) { + LOG(INFO) << "===========profile result==========="; + LOG(INFO) << "batch_size: " << bs << ", repeat: " << repeat + << ", avg latency: " << latency / repeat << "ms"; + LOG(INFO) << "====================================="; +} + +void Main(int batch_size) { + // Three sequence inputs. + std::vector input_slots(1); + // one batch starts + // data -- + int64_t data0[] = {0, 1, 2}; + for (auto &input : input_slots) { + input.data.Reset(data0, sizeof(data0)); + input.shape = std::vector({3, 1}); + // dtype -- + input.dtype = PaddleDType::INT64; + // LoD -- + input.lod = std::vector>({{0, 3}}); + } + + // shape -- + // Create Predictor -- + AnalysisConfig config; + config.model_dir = FLAGS_infer_model; + config.use_gpu = false; + config.enable_ir_optim = true; + config.ir_passes.push_back("fc_lstm_fuse_pass"); + auto predictor = + CreatePaddlePredictor( + config); + + inference::Timer timer; + double sum = 0; + std::vector output_slots; + for (int i = 0; i < FLAGS_repeat; i++) { + timer.tic(); + CHECK(predictor->Run(input_slots, &output_slots)); + sum += timer.toc(); + } + PrintTime(sum, batch_size, FLAGS_repeat); + + // Get output + LOG(INFO) << "get outputs " << output_slots.size(); + + for (auto &output : output_slots) { + LOG(INFO) << "output.shape: " << to_string(output.shape); + // no lod ? + CHECK_EQ(output.lod.size(), 0UL); + LOG(INFO) << "output.dtype: " << output.dtype; + std::stringstream ss; + for (int i = 0; i < 5; i++) { + ss << static_cast(output.data.data())[i] << " "; + } + LOG(INFO) << "output.data summary: " << ss.str(); + // one batch ends + } +} + +TEST(text_classification, basic) { Main(FLAGS_batch_size); } + +} // namespace paddle + +USE_PASS(fc_fuse_pass); +USE_PASS(seq_concat_fc_fuse_pass); +USE_PASS(fc_lstm_fuse_pass); +USE_PASS(graph_viz_pass); +USE_PASS(infer_clean_graph_pass); +USE_PASS(attention_lstm_fuse_pass); diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 3a43c72e33b3d5d8910b554021bb1c6a626edd93..ea00bf364951b0a4304b380df492d00e84451136 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -44,7 +44,19 @@ function(inference_api_test TARGET_NAME) endfunction(inference_api_test) cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor) -cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api) +cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api + analysis + ir_pass_manager + pass + fc_fuse_pass + fc_lstm_fuse_pass + seq_concat_fc_fuse_pass + graph_viz_pass + infer_clean_graph_pass + graph_pattern_detector + infer_clean_graph_pass + attention_lstm_fuse_pass + ) cc_test(test_paddle_inference_api SRCS api_tester.cc diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e87abd2feeff7769eb223f83a7a28f5cb3337cdb..a8fa677202d8429c274a6e3fdfd18ef5d48620c2 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/inference/api/analysis_predictor.h" #include +#include +#include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/scope.h" @@ -28,6 +30,8 @@ bool AnalysisPredictor::Init( VLOG(3) << "Predictor::init()"; if (config_.use_gpu) { place_ = paddle::platform::CUDAPlace(config_.device); + LOG(WARNING) << "ir optimize only supports CPU currently"; + config_.enable_ir_optim = false; } else { place_ = paddle::platform::CPUPlace(); } @@ -73,7 +77,7 @@ bool AnalysisPredictor::Init( void AnalysisPredictor::OptimizeInferenceProgram() { LOG(INFO) << "optimize begin"; - FLAGS_IA_enable_ir = true; + FLAGS_IA_enable_ir = config_.enable_ir_optim; FLAGS_IA_enable_tensorrt_subgraph_engine = false; FLAGS_IA_output_storage_path = ""; // Don't output the model. // Analyze inference_program @@ -90,24 +94,26 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } argument_.origin_program_desc.reset( new ProgramDesc(*inference_program_->Proto())); - Analyzer().Run(&argument_); + PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude, + "Only kExclude is supported yet."); + Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_); + CHECK(argument_.transformed_program_desc); VLOG(5) << "to prepare executor"; - // LOG(INFO) << "transformed_parogram_desc " << - // argument.transformed_program_desc->DebugString(); inference_program_.reset( new framework::ProgramDesc(*argument_.transformed_program_desc)); - PADDLE_ENFORCE(argument_.Has(framework::ir::kParamScopeAttr)); - // Update scope. - scope_.reset( - argument_.Release(framework::ir::kParamScopeAttr)); - LOG(INFO) << "optimize end =="; + if (argument_.Has(framework::ir::kParamScopeAttr)) { + // Update scope. + scope_.reset( + argument_.Release(framework::ir::kParamScopeAttr)); + } + LOG(INFO) << "== optimize end =="; } template <> std::unique_ptr CreatePaddlePredictor< - NativeConfig, PaddleEngineKind::kAnalysis>(const NativeConfig& config) { - VLOG(3) << "create NativePredictor"; + AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) { + VLOG(3) << "create AnalysisConfig"; if (config.use_gpu) { // 1. GPU memeroy PADDLE_ENFORCE_GT( diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index e32b6185f6044ab3577bde0a8f8dcf2391688aa8..e53925366e9214cd60422efe56884751297c15e5 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" @@ -28,7 +30,7 @@ using framework::proto::ProgramDesc; */ class AnalysisPredictor : public NativePaddlePredictor { public: - explicit AnalysisPredictor(const NativeConfig& config) + explicit AnalysisPredictor(const AnalysisConfig& config) : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope); @@ -44,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor { Argument& analysis_argument() { return argument_; } private: - NativeConfig config_; + AnalysisConfig config_; Argument argument_; }; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 38b11d9113e4b03f8365b969009f7a385a683a70..bd9b4b1a814f995e3979105f5b9830b95fd8ea7d 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -176,7 +176,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, framework::Scope *scope) { VLOG(3) << "Predictor::set_feed"; if (inputs.size() != feeds_.size()) { - LOG(ERROR) << "wrong feed input size."; + LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get " + << inputs.size(); return false; } for (size_t i = 0; i < inputs.size(); ++i) { diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 1baa64c249f291ec1bc874be5031abe6d4368274..995da11e4a30eca72a91a53d3293aa8b033b012b 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -150,6 +150,21 @@ struct TensorRTConfig : public NativeConfig { int workspace_size{1 << 30}; }; +// NOTE WIP, not stable yet. +struct AnalysisConfig : public NativeConfig { + // + enum class IrPassMode { + kSystem, // Use system default passes, not customize. + kInclude, // Specify the passes in `ir_passes`. + kExclude // Specify the disabled passes in `ir_passes`. + }; + + bool enable_ir_optim = true; + IrPassMode ir_mode{IrPassMode::kExclude}; + // attention lstm fuse works only on some specific models, disable as default. + std::vector ir_passes{"attention_lstm_fuse_pass"}; +}; + // A factory to help create different predictors. // // FOR EXTENSION DEVELOPER: diff --git a/paddle/fluid/operators/auc_op.cc b/paddle/fluid/operators/auc_op.cc index 5edecd18e673da326ec119cf9a383f24f8045089..dfaa7456f917c1308984b361afed752f96ea6f59 100644 --- a/paddle/fluid/operators/auc_op.cc +++ b/paddle/fluid/operators/auc_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/auc_op.h" -#include namespace paddle { namespace operators { @@ -36,15 +35,12 @@ class AucOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(predict_height, label_height, "Out and Label should have same height."); - int num_thres = ctx->Attrs().Get("num_thresholds"); + int num_pred_buckets = ctx->Attrs().Get("num_thresholds") + 1; ctx->SetOutputDim("AUC", {1}); - ctx->SetOutputDim("TPOut", {num_thres}); - ctx->SetOutputDim("TNOut", {num_thres}); - ctx->SetOutputDim("FPOut", {num_thres}); - ctx->SetOutputDim("FNOut", {num_thres}); - - ctx->ShareLoD("Predict", /*->*/ "AUC"); + ctx->SetOutputDim("BatchAUC", {1}); + ctx->SetOutputDim("StatPosOut", {num_pred_buckets}); + ctx->SetOutputDim("StatNegOut", {num_pred_buckets}); } protected: @@ -66,25 +62,24 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Label", "A 2D int tensor indicating the label of the training data. " "shape: [batch_size, 1]"); - AddInput("TP", "True-Positive value."); - AddInput("FP", "False-Positive value."); - AddInput("TN", "True-Negative value."); - AddInput("FN", "False-Negative value."); // TODO(typhoonzero): support weight input + AddInput("StatPos", "Statistic value when label = 1"); + AddInput("StatNeg", "Statistic value when label = 0"); + AddOutput("AUC", "A scalar representing the " "current area-under-the-curve."); - AddOutput("TPOut", "True-Positive value."); - AddOutput("FPOut", "False-Positive value."); - AddOutput("TNOut", "True-Negative value."); - AddOutput("FNOut", "False-Negative value."); + AddOutput("BatchAUC", "The AUC for current batch"); + AddOutput("StatPosOut", "Statistic value when label = 1"); + AddOutput("StatNegOut", "Statistic value when label = 0"); AddAttr("curve", "Curve type, can be 'ROC' or 'PR'.") .SetDefault("ROC"); + AddAttr("num_thresholds", "The number of thresholds to use when discretizing the" " roc curve.") - .SetDefault(200); + .SetDefault((2 << 12) - 1); AddComment(R"DOC( Area Under The Curve (AUC) Operator. diff --git a/paddle/fluid/operators/auc_op.h b/paddle/fluid/operators/auc_op.h index 0a18585edb54a76aff5ae72ecc71e0eebb9f9361..fb0517d70635e090f8c5b59ff9d8420fc34c747b 100644 --- a/paddle/fluid/operators/auc_op.h +++ b/paddle/fluid/operators/auc_op.h @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once + #include #include -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -23,106 +23,85 @@ namespace operators { using Tensor = framework::Tensor; -template -using EigenVector = framework::EigenVector; - template class AucKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* predict = ctx.Input("Predict"); - auto* label = ctx.Input("Label"); - auto* auc = ctx.Output("AUC"); + void Compute(const framework::ExecutionContext &ctx) const override { + auto *predict = ctx.Input("Predict"); + auto *label = ctx.Input("Label"); + + std::string curve = ctx.Attr("curve"); + int num_thresholds = ctx.Attr("num_thresholds"); + int num_pred_buckets = num_thresholds + 1; + // Only use output var for now, make sure it's persistable and // not cleaned up for each batch. - auto* true_positive = ctx.Output("TPOut"); - auto* false_positive = ctx.Output("FPOut"); - auto* true_negative = ctx.Output("TNOut"); - auto* false_negative = ctx.Output("FNOut"); + auto *auc = ctx.Output("AUC"); + auto *stat_pos = ctx.Output("StatPosOut"); + auto *stat_neg = ctx.Output("StatNegOut"); - auto* auc_data = auc->mutable_data(ctx.GetPlace()); + auto *stat_pos_data = stat_pos->mutable_data(ctx.GetPlace()); + auto *stat_neg_data = stat_neg->mutable_data(ctx.GetPlace()); + calcAuc(ctx, label, predict, stat_pos_data, stat_neg_data, num_thresholds, + auc); - std::string curve = ctx.Attr("curve"); - int num_thresholds = ctx.Attr("num_thresholds"); - std::vector thresholds_list; - thresholds_list.reserve(num_thresholds); - for (int i = 1; i < num_thresholds - 1; i++) { - thresholds_list[i] = static_cast(i) / (num_thresholds - 1); - } - const double kEpsilon = 1e-7; - thresholds_list[0] = 0.0f - kEpsilon; - thresholds_list[num_thresholds - 1] = 1.0f + kEpsilon; + auto *batch_auc = ctx.Output("BatchAUC"); + std::vector stat_pos_batch(num_pred_buckets, 0); + std::vector stat_neg_batch(num_pred_buckets, 0); + calcAuc(ctx, label, predict, stat_pos_batch.data(), stat_neg_batch.data(), + num_thresholds, batch_auc); + } + private: + inline static double trapezoidArea(double X1, double X2, double Y1, + double Y2) { + return (X1 > X2 ? (X1 - X2) : (X2 - X1)) * (Y1 + Y2) / 2.0; + } + + inline static void calcAuc(const framework::ExecutionContext &ctx, + const framework::Tensor *label, + const framework::Tensor *predict, + int64_t *stat_pos, int64_t *stat_neg, + int num_thresholds, + framework::Tensor *auc_tensor) { size_t batch_size = predict->dims()[0]; size_t inference_width = predict->dims()[1]; + const T *inference_data = predict->data(); + const auto *label_data = label->data(); + + auto *auc = auc_tensor->mutable_data(ctx.GetPlace()); - const T* inference_data = predict->data(); - const auto* label_data = label->data(); - - auto* tp_data = true_positive->mutable_data(ctx.GetPlace()); - auto* fn_data = false_negative->mutable_data(ctx.GetPlace()); - auto* tn_data = true_negative->mutable_data(ctx.GetPlace()); - auto* fp_data = false_positive->mutable_data(ctx.GetPlace()); - - for (int idx_thresh = 0; idx_thresh < num_thresholds; idx_thresh++) { - // calculate TP, FN, TN, FP for current thresh - int64_t tp = 0, fn = 0, tn = 0, fp = 0; - for (size_t i = 0; i < batch_size; i++) { - // NOTE: label_data used as bool, labels > 0 will be treated as true. - if (label_data[i]) { - if (inference_data[i * inference_width + 1] >= - (thresholds_list[idx_thresh])) { - tp++; - } else { - fn++; - } - } else { - if (inference_data[i * inference_width + 1] >= - (thresholds_list[idx_thresh])) { - fp++; - } else { - tn++; - } - } + for (size_t i = 0; i < batch_size; i++) { + uint32_t binIdx = static_cast( + inference_data[i * inference_width + 1] * num_thresholds); + if (label_data[i]) { + stat_pos[binIdx] += 1.0; + } else { + stat_neg[binIdx] += 1.0; } - // store rates - tp_data[idx_thresh] += tp; - fn_data[idx_thresh] += fn; - tn_data[idx_thresh] += tn; - fp_data[idx_thresh] += fp; } - // epsilon to avoid divide by zero. - double epsilon = 1e-6; - // Riemann sum to caculate auc. - Tensor tp_rate, fp_rate, rec_rate; - tp_rate.Resize({num_thresholds}); - fp_rate.Resize({num_thresholds}); - rec_rate.Resize({num_thresholds}); - auto* tp_rate_data = tp_rate.mutable_data(ctx.GetPlace()); - auto* fp_rate_data = fp_rate.mutable_data(ctx.GetPlace()); - auto* rec_rate_data = rec_rate.mutable_data(ctx.GetPlace()); - for (int i = 0; i < num_thresholds; i++) { - tp_rate_data[i] = (static_cast(tp_data[i]) + epsilon) / - (tp_data[i] + fn_data[i] + epsilon); - fp_rate_data[i] = - static_cast(fp_data[i]) / (fp_data[i] + tn_data[i] + epsilon); - rec_rate_data[i] = (static_cast(tp_data[i]) + epsilon) / - (tp_data[i] + fp_data[i] + epsilon); + + *auc = 0.0f; + + double totPos = 0.0; + double totNeg = 0.0; + double totPosPrev = 0.0; + double totNegPrev = 0.0; + + int idx = num_thresholds; + + while (idx >= 0) { + totPosPrev = totPos; + totNegPrev = totNeg; + totPos += stat_pos[idx]; + totNeg += stat_neg[idx]; + *auc += trapezoidArea(totNeg, totNegPrev, totPos, totPosPrev); + + --idx; } - *auc_data = 0.0f; - if (curve == "ROC") { - for (int i = 0; i < num_thresholds - 1; i++) { - auto dx = fp_rate_data[i] - fp_rate_data[i + 1]; - auto y = (tp_rate_data[i] + tp_rate_data[i + 1]) / 2.0f; - *auc_data = *auc_data + dx * y; - } - } else if (curve == "PR") { - for (int i = 1; i < num_thresholds; i++) { - auto dx = tp_rate_data[i] - tp_rate_data[i - 1]; - auto y = (rec_rate_data[i] + rec_rate_data[i - 1]) / 2.0f; - *auc_data = *auc_data + dx * y; - } + + if (totPos > 0.0 && totNeg > 0.0) { + *auc = *auc / totPos / totNeg; } } }; diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index f5c10ced8305b64c6386c5051804f8c9a8f71802..58463dc4d6fd7cc3454de766814a947fee161070 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -57,7 +57,7 @@ class LookupTableKernel : public framework::OpKernel { memset(output + i * row_width, 0, row_width * sizeof(T)); } else { PADDLE_ENFORCE_LT(ids[i], row_number); - PADDLE_ENFORCE_GE(ids[i], 0); + PADDLE_ENFORCE_GE(ids[i], 0, "ids %d", i); memcpy(output + i * row_width, table + ids[i] * row_width, row_width * sizeof(T)); } diff --git a/paddle/fluid/operators/rmsprop_op.cc b/paddle/fluid/operators/rmsprop_op.cc index 919ebe48ca38040274bd2052b95ef96eccff4db6..2f773f222e50a440801b06a4fd997bf237b34772 100644 --- a/paddle/fluid/operators/rmsprop_op.cc +++ b/paddle/fluid/operators/rmsprop_op.cc @@ -36,9 +36,13 @@ class RmspropOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), "Output(param_out) of RmspropOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), - "Output(Momentum_out) of RmspropOp should not be null."); + "Output(MomentOut) of RmspropOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("MeanSquareOut"), "Output(MeanSquareOut) of RmspropOp should not be null."); + if (ctx->Attrs().Get("centered")) { + PADDLE_ENFORCE(ctx->HasOutput("MeanGradOut"), + "Output(MeanGradOut) of RmspropOp should not be null."); + } auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( @@ -58,6 +62,9 @@ class RmspropOp : public framework::OperatorWithKernel { ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("MomentOut", param_dim); ctx->SetOutputDim("MeanSquareOut", param_dim); + if (ctx->Attrs().Get("centered")) { + ctx->SetOutputDim("MeanGradOut", param_dim); + } } }; @@ -70,6 +77,10 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("MeanSquare", "(Tensor, default Tensor)" " The mean square value that gets updated."); + AddInput("MeanGrad", + "(Tensor, default Tensor)" + " The moving average of gradient") + .AsDispensable(); AddInput("LearningRate", "(Tensor, default Tensor) " "The learning rate should be a tensor of size 1."); @@ -82,6 +93,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("ParamOut", "(Tensor) Output updated parameter value."); AddOutput("MomentOut", "(Tensor) Output updated moment."); AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value."); + AddOutput("MeanGradOut", + "(Tensor) Output moving average of gradient updated value."); AddAttr("epsilon", "(float, default 1e-10) Constant " @@ -93,6 +106,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(0.9f); AddAttr("momentum", "(float, default 0.0) Constant value.") .SetDefault(0.0f); + AddAttr("centered", "(bool, default false) use centered rmsprop.") + .SetDefault(false); AddComment(R"DOC( Rmsprop Optimizer. @@ -103,6 +118,14 @@ MomentOut = momentum * Moment + ParamOut = Param - MomentOut $$ +if centered is true: + +mean_grad = decay * mean_square{t-1} + (1-decay) * gradient +mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2 +mom = momentum * mom{t-1} + learning_rate * g_t / + sqrt(mean_square - mean_grad**2 + epsilon) +param -= mom + The original slides that proposed Rmsprop: Slide 29 of http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) diff --git a/paddle/fluid/operators/rmsprop_op.h b/paddle/fluid/operators/rmsprop_op.h index 12836f43bde47ac87eb0af33dea501593b659a5d..25ed32c5ebb2ff5be962ac1e3e38c970623d705c 100644 --- a/paddle/fluid/operators/rmsprop_op.h +++ b/paddle/fluid/operators/rmsprop_op.h @@ -41,6 +41,7 @@ class RmspropOpKernel : public framework::OpKernel { float epsilon = ctx.Attr("epsilon"); float rho = ctx.Attr("decay"); float momentum = ctx.Attr("momentum"); + bool centered = ctx.Attr("centered"); auto p = EigenVector::Flatten(*ctx.Input("Param")); auto ms = EigenVector::Flatten(*ctx.Input("MeanSquare")); @@ -53,12 +54,24 @@ class RmspropOpKernel : public framework::OpKernel { auto ms_out = EigenVector::Flatten(*mean_square_out); auto& place = *ctx.template device_context().eigen_device(); - Eigen::DSizes grad_dsize(grad->numel()); + Eigen::DSizes grad_dsize(static_cast(grad->numel())); ms_out.device(place) = rho * ms + (1 - rho) * g * g; - mom_out.device(place) = - momentum * mom + - lr.broadcast(grad_dsize) * g / (ms_out + epsilon).sqrt(); + if (centered) { + auto mg = EigenVector::Flatten(*ctx.Input("MeanGrad")); + auto* mean_grad_out = ctx.Output("MeanGradOut"); + mean_grad_out->mutable_data(ctx.GetPlace()); + auto mg_out = EigenVector::Flatten(*mean_grad_out); + + mg_out.device(place) = rho * mg + (1 - rho) * g; + mom_out.device(place) = momentum * mom + + lr.broadcast(grad_dsize) * g / + (ms_out - mg_out.square() + epsilon).sqrt(); + } else { + mom_out.device(place) = + momentum * mom + + lr.broadcast(grad_dsize) * g / (ms_out + epsilon).sqrt(); + } p_out.device(place) = p - mom_out; } }; diff --git a/python/paddle/fluid/layers/metric_op.py b/python/paddle/fluid/layers/metric_op.py index 0182bbeb637ec7b6a341a4822a1cc5fb5aef077d..b1598bfec210474ae1e17f9f88e8b57aa80b8452 100644 --- a/python/paddle/fluid/layers/metric_op.py +++ b/python/paddle/fluid/layers/metric_op.py @@ -78,7 +78,7 @@ def accuracy(input, label, k=1, correct=None, total=None): return acc_out -def auc(input, label, curve='ROC', num_thresholds=200, topk=1): +def auc(input, label, curve='ROC', num_thresholds=2**12 - 1, topk=1): """ **Area Under the Curve (AUC) Layer** @@ -118,16 +118,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1): """ helper = LayerHelper("auc", **locals()) auc_out = helper.create_tmp_variable(dtype="float64") + batch_auc_out = helper.create_tmp_variable(dtype="float64") # make tp, tn, fp, fn persistable, so that can accumulate all batches. - tp = helper.create_global_variable( - persistable=True, dtype='int64', shape=[num_thresholds]) - tn = helper.create_global_variable( - persistable=True, dtype='int64', shape=[num_thresholds]) - fp = helper.create_global_variable( - persistable=True, dtype='int64', shape=[num_thresholds]) - fn = helper.create_global_variable( - persistable=True, dtype='int64', shape=[num_thresholds]) - for var in [tp, tn, fp, fn]: + stat_pos = helper.create_global_variable( + persistable=True, dtype='int64', shape=[num_thresholds + 1]) + stat_neg = helper.create_global_variable( + persistable=True, dtype='int64', shape=[num_thresholds + 1]) + + for var in [stat_pos, stat_neg]: helper.set_variable_initializer( var, Constant( value=0.0, force_cpu=True)) @@ -137,18 +135,15 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1): inputs={ "Predict": [input], "Label": [label], - "TP": [tp], - "TN": [tn], - "FP": [fp], - "FN": [fn] + "StatPos": [stat_pos], + "StatNeg": [stat_neg] }, attrs={"curve": curve, "num_thresholds": num_thresholds}, outputs={ "AUC": [auc_out], - "TPOut": [tp], - "TNOut": [tn], - "FPOut": [fp], - "FNOut": [fn] + "BatchAUC": [batch_auc_out], + "StatPosOut": [stat_pos], + "StatNegOut": [stat_neg] }) - return auc_out, [tp, tn, fp, fn] + return auc_out, batch_auc_out, [stat_pos, stat_neg] diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index 592cb23eb9319658f8542ed5bc6ab3e95cfdb118..0c2800dcf35ed156b71625babea2724f520575e5 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -558,8 +558,6 @@ class Auc(MetricBase): name: metric name curve: Specifies the name of the curve to be computed, 'ROC' [default] or 'PR' for the Precision-Recall-curve. - num_thresholds: The number of thresholds to use when discretizing the roc - curve. "NOTE: only implement the ROC curve type via Python now." @@ -574,15 +572,14 @@ class Auc(MetricBase): numpy_auc = metric.eval() """ - def __init__(self, name, curve='ROC', num_thresholds=200): + def __init__(self, name, curve='ROC', num_thresholds=4095): super(Auc, self).__init__(name=name) self._curve = curve self._num_thresholds = num_thresholds - self._epsilon = 1e-6 - self.tp_list = np.zeros((num_thresholds, )) - self.fn_list = np.zeros((num_thresholds, )) - self.tn_list = np.zeros((num_thresholds, )) - self.fp_list = np.zeros((num_thresholds, )) + + _num_pred_buckets = num_thresholds + 1 + self._stat_pos = [0] * _num_pred_buckets + self._stat_neg = [0] * _num_pred_buckets def update(self, preds, labels): if not _is_numpy_(labels): @@ -590,41 +587,32 @@ class Auc(MetricBase): if not _is_numpy_(preds): raise ValueError("The 'predictions' must be a numpy ndarray.") - kepsilon = 1e-7 # to account for floating point imprecisions - thresholds = [(i + 1) * 1.0 / (self._num_thresholds - 1) - for i in range(self._num_thresholds - 2)] - thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] - - # calculate TP, FN, TN, FP count - for idx_thresh, thresh in enumerate(thresholds): - tp, fn, tn, fp = 0, 0, 0, 0 - for i, lbl in enumerate(labels): - if lbl: - if preds[i, 1] >= thresh: - tp += 1 - else: - fn += 1 - else: - if preds[i, 1] >= thresh: - fp += 1 - else: - tn += 1 - self.tp_list[idx_thresh] += tp - self.fn_list[idx_thresh] += fn - self.tn_list[idx_thresh] += tn - self.fp_list[idx_thresh] += fp + for i, lbl in enumerate(labels): + value = preds[i, 1] + bin_idx = int(value * self._num_thresholds) + assert bin_idx <= self._num_thresholds + if lbl: + self._stat_pos[bin_idx] += 1.0 + else: + self._stat_neg[bin_idx] += 1.0 + + @staticmethod + def trapezoid_area(x1, x2, y1, y2): + return abs(x1 - x2) * (y1 + y2) / 2.0 def eval(self): - epsilon = self._epsilon - num_thresholds = self._num_thresholds - tpr = (self.tp_list.astype("float32") + epsilon) / ( - self.tp_list + self.fn_list + epsilon) - fpr = self.fp_list.astype("float32") / ( - self.fp_list + self.tn_list + epsilon) - rec = (self.tp_list.astype("float32") + epsilon) / ( - self.tp_list + self.fp_list + epsilon) - - x = fpr[:num_thresholds - 1] - fpr[1:] - y = (tpr[:num_thresholds - 1] + tpr[1:]) / 2.0 - auc_value = np.sum(x * y) - return auc_value + tot_pos = 0.0 + tot_neg = 0.0 + auc = 0.0 + + idx = self._num_thresholds + while idx >= 0: + tot_pos_prev = tot_pos + tot_neg_prev = tot_neg + tot_pos += self._stat_pos[idx] + tot_neg += self._stat_neg[idx] + auc += self.trapezoid_area(tot_neg, tot_neg_prev, tot_pos, + tot_pos_prev) + idx -= 1 + + return auc / tot_pos / tot_neg if tot_pos > 0.0 and tot_neg > 0.0 else 0.0 diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 33d6311b9717c66f0d6782eb6b3e348cd4c02a69..215f0cf2fc5ab4fbd06719ac4790a01dd00080eb 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -897,7 +897,20 @@ class RMSPropOptimizer(Optimizer): r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 - v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{v(w,t) + + v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{r(w,t) + + \\epsilon}} \\nabla Q_{i}(w) + + w & = w - v(w, t) + + if centered is True: + + .. math:: + + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 + + g(w, t) & = \\rho g(w, t-1) + (1 - \\rho)\\nabla Q_{i}(w) + + v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{r(w,t) - (g(w, t))^2 + \\epsilon}} \\nabla Q_{i}(w) w & = w - v(w, t) @@ -915,6 +928,10 @@ class RMSPropOptimizer(Optimizer): avoid division by zero, set 1e-6 by default. momentum(float): :math:`\\beta` in equation is the momentum term, set 0.0 by default. + centered(bool): If True, gradients are normalized by the estimated variance of + the gradient; if False, by the uncentered second moment. Setting this to + True may help with training, but is slightly more expensive in terms of + computation and memory. Defaults to False. Raises: ValueError: If learning_rate, rho, epsilon, momentum are None. @@ -928,12 +945,14 @@ class RMSPropOptimizer(Optimizer): _momentum_acc_str = "momentum" _mean_square_acc_str = "mean_square" + _mean_grad_acc_str = "mean_grad" def __init__(self, learning_rate, rho=0.95, epsilon=1.0e-6, momentum=0.0, + centered=False, **kwargs): super(RMSPropOptimizer, self).__init__( learning_rate=learning_rate, **kwargs) @@ -950,6 +969,7 @@ class RMSPropOptimizer(Optimizer): self._rho = rho self._epsilon = epsilon self._momentum = momentum + self._centered = centered def _create_accumulators(self, block, parameters): if not isinstance(block, framework.Block): @@ -958,6 +978,7 @@ class RMSPropOptimizer(Optimizer): for p in parameters: self._add_accumulator(self._momentum_acc_str, p) self._add_accumulator(self._mean_square_acc_str, p) + self._add_accumulator(self._mean_grad_acc_str, p) def _append_optimize_op(self, block, param_and_grad): if not isinstance(block, framework.Block): @@ -967,6 +988,8 @@ class RMSPropOptimizer(Optimizer): param_and_grad[0]) mean_square_acc = self._get_accumulator(self._mean_square_acc_str, param_and_grad[0]) + mean_grad_acc = self._get_accumulator(self._mean_grad_acc_str, + param_and_grad[0]) rmsprop_op = block.append_op( type=self.type, inputs={ @@ -974,17 +997,20 @@ class RMSPropOptimizer(Optimizer): "Grad": param_and_grad[1], "Moment": momentum_acc, "MeanSquare": mean_square_acc, + "MeanGrad": mean_grad_acc, "LearningRate": self._create_param_lr(param_and_grad), }, outputs={ "ParamOut": param_and_grad[0], "MomentOut": momentum_acc, - "MeanSquareOut": mean_square_acc + "MeanSquareOut": mean_square_acc, + "MeanGradOut": mean_grad_acc }, attrs={ "epsilon": self._epsilon, "decay": self._rho, - "momentum": self._momentum + "momentum": self._momentum, + "centered": self._centered }) return rmsprop_op diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 20f1a37a426e9697048d636bf738c9056213e5f6..868df5d79424302700d9f82083cd7e4320c707ea 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -291,7 +291,7 @@ class OpTest(unittest.TestCase): return_numpy=False) return outs, fetch_list - def check_output_with_place(self, place, atol): + def check_output_with_place(self, place, atol, equal_nan=False): outs, fetch_list = self._calc_output(place) for out_name, out_dup in Operator.get_op_outputs(self.op_type): if out_name not in self.outputs: @@ -321,7 +321,7 @@ class OpTest(unittest.TestCase): if isinstance(expect, tuple) else expect self.assertTrue( np.allclose( - actual_t, expect_t, atol=atol), + actual_t, expect_t, atol=atol, equal_nan=equal_nan), "Output (" + sub_out_name + ") has diff at " + str(place)) if isinstance(expect, tuple): @@ -337,7 +337,7 @@ class OpTest(unittest.TestCase): expect_t = expect[0] if isinstance(expect, tuple) else expect self.assertTrue( np.allclose( - actual_t, expect_t, atol=atol), + actual_t, expect_t, atol=atol, equal_nan=equal_nan), "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t)) @@ -360,10 +360,10 @@ class OpTest(unittest.TestCase): places.append(core.CUDAPlace(0)) return places - def check_output(self, atol=1e-5): + def check_output(self, atol=1e-5, equal_nan=False): places = self._get_places() for place in places: - self.check_output_with_place(place, atol) + self.check_output_with_place(place, atol, equal_nan) def check_output_customized(self, checker): places = self._get_places() diff --git a/python/paddle/fluid/tests/unittests/test_auc_op.py b/python/paddle/fluid/tests/unittests/test_auc_op.py index 5393a17e674a3cad6d705a1ff7a45320e644af94..1de4a9d016a177944253d12094722d3a05614be2 100644 --- a/python/paddle/fluid/tests/unittests/test_auc_op.py +++ b/python/paddle/fluid/tests/unittests/test_auc_op.py @@ -26,18 +26,15 @@ class TestAucOp(OpTest): pred = np.random.random((128, 2)).astype("float32") labels = np.random.randint(0, 2, (128, 1)) num_thresholds = 200 - tp = np.zeros((num_thresholds, )).astype("int64") - tn = np.zeros((num_thresholds, )).astype("int64") - fp = np.zeros((num_thresholds, )).astype("int64") - fn = np.zeros((num_thresholds, )).astype("int64") + + stat_pos = np.zeros((num_thresholds + 1, )).astype("int64") + stat_neg = np.zeros((num_thresholds + 1, )).astype("int64") self.inputs = { 'Predict': pred, 'Label': labels, - 'TP': tp, - 'TN': tn, - 'FP': fp, - 'FN': fn + "StatPos": stat_pos, + "StatNeg": stat_neg } self.attrs = {'curve': 'ROC', 'num_thresholds': num_thresholds} @@ -47,11 +44,10 @@ class TestAucOp(OpTest): python_auc.update(pred, labels) self.outputs = { - 'AUC': python_auc.eval(), - 'TPOut': python_auc.tp_list, - 'FNOut': python_auc.fn_list, - 'TNOut': python_auc.tn_list, - 'FPOut': python_auc.fp_list + 'AUC': np.array(python_auc.eval()), + 'BatchAUC': np.array(python_auc.eval()), + 'StatPosOut': np.array(python_auc._stat_pos), + 'StatNegOut': np.array(python_auc._stat_neg) } def test_check_output(self): diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index 3d4623c74d9a307b12ab6d72ad0b4d2dae938720..70848e4e2239e2be160bb0c1a28a5aecd01a87dc 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -15,90 +15,164 @@ from __future__ import print_function import unittest + import numpy as np -from op_test import OpTest - - -class TestRmspropOp1(OpTest): - ''' Test RMSProp with explicit inputs - ''' - - def setUp(self): - self.op_type = "rmsprop" - - param = np.random.random((123, 321)).astype("float32") - mean_square = np.random.random((123, 321)).astype("float32") - learning_rate = np.array([0.01]).astype("float32") - grad = np.random.random((123, 321)).astype("float32") - moment = np.zeros((123, 321)).astype("float32") - - epsilon = 1e-6 - decay = 0.9 - momentum = 0.0 - - self.inputs = { - 'Param': param, - 'MeanSquare': mean_square, - 'LearningRate': learning_rate, - 'Grad': grad, - 'Moment': moment, - } - - self.attrs = {'epsilon': epsilon, 'decay': decay, 'momentum': momentum} - - ms_out = decay * mean_square + (1 - decay) * grad * grad - moment_out = momentum * moment + \ - learning_rate * grad / np.sqrt(ms_out + epsilon) - param_out = param - moment_out - - self.outputs = { - 'ParamOut': param_out, - 'MomentOut': moment_out, - 'MeanSquareOut': ms_out - } - - def test_check_output(self): - self.check_output() - - -class TestRmspropOp2(OpTest): - '''Test RMSProp with default values for attributes - ''' - - def setUp(self): - self.op_type = "rmsprop" - - param = np.random.random((123, 321)).astype("float32") - mean_square = np.random.random((123, 321)).astype("float32") - learning_rate = np.array([0.01]).astype("float32") - grad = np.random.random((123, 321)).astype("float32") - moment = np.zeros((123, 321)).astype("float32") - - epsilon = 1.0e-10 - decay = 0.9 - momentum = 0.0 - - self.inputs = { - 'Param': param, - 'MeanSquare': mean_square, - 'LearningRate': learning_rate, - 'Grad': grad, - 'Moment': moment, - } - - ms_out = decay * mean_square + (1 - decay) * grad * grad - moment_out = momentum * moment + \ - learning_rate * grad / np.sqrt(ms_out + epsilon) - param_out = param - moment_out - - self.outputs = { - 'ParamOut': param_out, - 'MomentOut': moment_out, - 'MeanSquareOut': ms_out - } - - def test_check_output(self): - self.check_output() +import paddle.fluid.core as core +from paddle.fluid.op import Operator + + +class TestBase(unittest.TestCase): + def setup(self, centered, epsilon=1e-6): + np.random.seed(5) # fix seed + + self.param_name = "param" + self.param = np.random.random((123, 321)).astype("float32") + + self.mean_square_name = "mean_square" + self.mean_square = np.random.random((123, 321)).astype("float32") + + self.mean_grad_name = "mean_grad" + self.mean_grad = np.random.random((123, 321)).astype("float32") + + self.lr_name = "lr" + self.learning_rate = np.array([0.01]).astype("float32") + + self.grad_name = "grad" + self.grad = np.random.random((123, 321)).astype("float32") + + self.moment_name = "moment" + self.moment = np.zeros((123, 321)).astype("float32") + + self.epsilon = epsilon + self.decay = 0.9 + self.momentum = 0.0 + self.centered = centered + + self.ms_out = self.decay * self.mean_square + (1 - self.decay + ) * self.grad * self.grad + if centered: + self.mg_out = self.decay * self.mean_grad + (1 - self.decay + ) * self.grad + self.moment_out = self.momentum * self.moment + \ + self.learning_rate * self.grad / np.sqrt(self.ms_out - np.square(self.mg_out) + self.epsilon) + else: + self.moment_out = self.momentum * self.moment + \ + self.learning_rate * self.grad / np.sqrt(self.ms_out + self.epsilon) + + self.param_out = self.param - self.moment_out + + def check(self, + actual_t, + expect_t, + place, + out_name, + atol=1e-5, + equal_nan=False): + self.assertTrue( + np.allclose( + actual_t, expect_t, atol=atol, equal_nan=equal_nan), + "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " + + str(expect_t) + "\n" + "But Got" + str(actual_t)) + + +class TestRmspropOp(TestBase): + def check_with_place(self, place, centered, epsilon): + self.setup(centered, epsilon) + scope = core.Scope() + + # create and initialize Param Variable + param = scope.var(self.param_name).get_tensor() + param.set(self.param, place) + + mean_square = scope.var(self.mean_square_name).get_tensor() + mean_square.set(self.mean_square, place) + + lr = scope.var(self.lr_name).get_tensor() + lr.set(self.learning_rate, place) + + grad = scope.var(self.grad_name).get_tensor() + grad.set(self.grad, place) + + moment = scope.var(self.moment_name).get_tensor() + moment.set(self.moment, place) + + # create and run sgd operator + + if self.centered: + mean_grad = scope.var(self.mean_grad_name).get_tensor() + mean_grad.set(self.mean_grad, place) + + rmsprop_op = Operator( + "rmsprop", + Param=self.param_name, + Grad=self.grad_name, + MeanSquare=self.mean_square_name, + MeanGrad=self.mean_grad_name, + Moment=self.moment_name, + LearningRate=self.lr_name, + ParamOut=self.param_name, + MeanSquareOut=self.mean_square_name, + MomentOut=self.moment_name, + MeanGradOut=self.mean_grad_name, + epsilon=self.epsilon, + decay=self.decay, + momentum=self.momentum, + centered=True) + else: + rmsprop_op = Operator( + "rmsprop", + Param=self.param_name, + Grad=self.grad_name, + MeanSquare=self.mean_square_name, + Moment=self.moment_name, + LearningRate=self.lr_name, + ParamOut=self.param_name, + MeanSquareOut=self.mean_square_name, + MomentOut=self.moment_name, + epsilon=self.epsilon, + decay=self.decay, + momentum=self.momentum, + centered=False) + + rmsprop_op.run(scope, place) + + atol = 1e-5 + equal_nan = False + + if self.centered: + atol = 1e-3 + equal_nan = True + + self.check( + np.array(mean_square), self.ms_out, place, self.mean_square_name) + self.check( + np.array(moment), + self.moment_out, + place, + self.moment_name, + atol=atol, + equal_nan=equal_nan) + self.check( + np.array(param), + self.param_out, + place, + self.param_name, + atol=atol, + equal_nan=equal_nan) + + if self.centered: + self.check( + np.array(mean_grad), self.mg_out, place, self.mean_grad_name) + + def test_rmsprop(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + for place in places: + self.check_with_place(place, False, 1e-6) + self.check_with_place(place, False, 1e-10) + self.check_with_place(place, True, 1e-6) + self.check_with_place(place, True, 1e-10) if __name__ == "__main__":