diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 3808dd5fbaeaa3671e50bb31ea532251a1e9f6fc..6974420db0421ffcd3dd84fb6d83697e0abf967d 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -69,6 +69,7 @@ pass_library(conv_affine_channel_fuse_pass inference) pass_library(transpose_flatten_concat_fuse_pass inference) pass_library(identity_scale_op_clean_pass base) pass_library(sync_batch_norm_pass base) +pass_library(runtime_context_cache_pass base) # There may be many transpose-flatten structures in a model, and the output of # these structures will be used as inputs to the concat Op. This pattern will diff --git a/paddle/fluid/framework/ir/runtime_context_cache_pass.cc b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..67b29512c4cf3512e4b2b4b5a18ba60a3d9120dc --- /dev/null +++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/runtime_context_cache_pass.h" +#include +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace framework { +namespace ir { + +std::unique_ptr RuntimeContextCachePass::ApplyImpl( + std::unique_ptr graph) const { + VLOG(3) << "Applies Runtime Context Cache strategy."; + for (const Node* n : graph->Nodes()) { + if (n->IsOp()) { + n->Op()->SetAttr(kEnableCacheRuntimeContext, true); + } + } + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(runtime_context_cache_pass, + paddle::framework::ir::RuntimeContextCachePass); diff --git a/paddle/fluid/framework/ir/runtime_context_cache_pass.h b/paddle/fluid/framework/ir/runtime_context_cache_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..a6cf1a9ae5035f185dd3ab52bf0762a6eaf0f6e5 --- /dev/null +++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class RuntimeContextCachePass : public Pass { + protected: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 44821aadf6d1f6777484dab5d25d01ff3b42596b..ab96201b3399c9daf3cf7e132d3f088a5ab41e7d 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -874,9 +874,23 @@ std::vector* OperatorWithKernel::GetKernelConfig( return kernel_configs; } +RuntimeContext* OperatorWithKernel::GetRuntimeContext( + const Scope& scope) const { + if (!HasAttr(kEnableCacheRuntimeContext)) { + return new RuntimeContext(Inputs(), Outputs(), scope); + } else { + const Scope* cur_scope = &scope; + if (!runtime_ctx_ || pre_scope_ != cur_scope) { + runtime_ctx_.reset(new RuntimeContext(Inputs(), Outputs(), scope)); + pre_scope_ = cur_scope; + } + return runtime_ctx_.get(); + } +} + void OperatorWithKernel::RunImpl(const Scope& scope, const platform::Place& place) const { - RuntimeContext ctx(Inputs(), Outputs(), scope); + auto runtime_ctx = GetRuntimeContext(scope); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.Get(place); @@ -891,7 +905,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, OpKernelMap& kernels = kernels_iter->second; auto expected_kernel_key = this->GetExpectedKernelType( - ExecutionContext(*this, scope, *dev_ctx, ctx, nullptr)); + ExecutionContext(*this, scope, *dev_ctx, *runtime_ctx, nullptr)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); @@ -915,8 +929,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // do data transformScope &transfer_scope; std::vector transfered_inplace_vars; - auto* transfer_scope = - PrepareData(scope, expected_kernel_key, &transfered_inplace_vars, &ctx); + auto* transfer_scope = PrepareData(scope, expected_kernel_key, + &transfered_inplace_vars, runtime_ctx); // exec scope is the scope that kernel actually executed on. const Scope& exec_scope = @@ -927,13 +941,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) { - RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx); + RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx); this->InferShape(&infer_shape_ctx); } // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // not Scope. Imperative mode only pass inputs and get outputs. - kernel_iter->second( - ExecutionContext(*this, exec_scope, *dev_ctx, ctx, kernel_configs)); + kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx, + *runtime_ctx, kernel_configs)); if (!transfered_inplace_vars.empty()) { // there is inplace variable has been transfered. diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 822bf5c9ceaa31e1283fa3cf1dbe42a43894a5dd..ca5f0e27b368912bc6bd1554f3b0fbf05f9a147d 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -62,6 +62,14 @@ constexpr char kZeroVarSuffix[] = "@ZERO"; /// Variables with this suffix are the new Gradient. constexpr char kNewGradSuffix[] = "@NEWGRAD@"; +/// RuntimeContext is used to relate input/output names of Operator with +/// the corresponding variables in name scope. +/// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same +/// name scope, since the input/output names of this Op do not change in the +/// execution, RuntimeContext could be created only at the first iteration of +/// this Op's execution to save the elapsed time. +constexpr char kEnableCacheRuntimeContext[] = "@ENABLE_CACHE_RUNTIME_CONTEXT@"; + /// If an Op has this attribute, all its kernels should calculate output /// variable's shape in the corresponding Compute() function. And /// OperatorWithKernel::RunImpl() would skip call this Op's InferShape() @@ -456,6 +464,7 @@ class OperatorWithKernel : public OperatorBase { // same. proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const; void RunImpl(const Scope& scope, const platform::Place& place) const final; + RuntimeContext* GetRuntimeContext(const Scope& scope) const; /** * Transfer data from scope to a transfered scope. If there is no data need to @@ -474,6 +483,8 @@ class OperatorWithKernel : public OperatorBase { protected: mutable OpKernelConfigsMap kernel_configs_map_; + mutable std::unique_ptr runtime_ctx_; + mutable const Scope* pre_scope_ = nullptr; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 92526f4e74a217aa2cdfd43f258846ada54b9374..4cad8a9dfc3fc6ba06a28a1ad3a5e4d43ec38395 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -118,6 +118,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(serialized_info_cache_); + // framework related. + CP_MEMBER(enable_runtime_context_cache_); + if (use_gpu_) { pass_builder_.reset(new GpuPassStrategy( *static_cast(other.pass_builder()))); @@ -232,6 +235,10 @@ void AnalysisConfig::Update() { if (ir_debug_) { pass_builder()->TurnOnDebug(); } + + if (enable_runtime_context_cache_) { + pass_builder()->AppendPass("runtime_context_cache_pass"); + } } std::string AnalysisConfig::SerializeInfoCache() { @@ -265,6 +272,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << specify_input_name_; ss << cpu_math_library_num_threads_; + ss << enable_runtime_context_cache_; return ss.str(); } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 9b05c335047d7f9a0c50004e4ff6817ddd53d80f..3b7faa54000a26310d0117fe6f1e68cc404c461a 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -194,6 +194,23 @@ struct AnalysisConfig { /** Tell whether the memory optimization is activated. */ bool enable_memory_optim() const; + // framework related + /** \brief Control whether to perform runtime context cache optimization. + * + * If turned off, in Op's every execution, RuntimeContext would be called to + * relate input/output names of this Op with the corresponding variables in + * Scope. + */ + void SwitchRuntimeContextCache(int x = true) { + enable_runtime_context_cache_ = x; + } + /** A boolean state tell whether the runtime context cache optimization is + * actived. + */ + bool runtime_context_cache_enabled() const { + return enable_runtime_context_cache_; + } + friend class ::paddle::AnalysisPredictor; /** NOTE just for developer, not an official API, easily to be broken. @@ -254,6 +271,15 @@ struct AnalysisConfig { int cpu_math_library_num_threads_{1}; + // framework related + // RuntimeContext is used to relate input/output names of Operator with + // the corresponding variables in Scope. + // If enable_runtime_context_cache_ is true, it means that in a same Scope, + // since the input/output names of this Op do not change in the execution, + // RuntimeContext could be created only at the first iteration of this Op's + // execution to save the elapsed time. + bool enable_runtime_context_cache_{false}; + // A runtime cache, shouldn't be transferred to others. std::string serialized_info_cache_; diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 5157bd280d0f3ee327d5cee7799477b5e6fd3f71..e1787a71775207d4d94f4005cffb82c2b24274e6 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -107,6 +107,7 @@ void SetConfig(AnalysisConfig *cfg) { cfg->DisableGpu(); cfg->SwitchSpecifyInputNames(); cfg->SwitchIrOptim(); + cfg->SwitchRuntimeContextCache(); if (FLAGS_zero_copy) { cfg->SwitchUseFeedFetchOps(false); } diff --git a/paddle/fluid/inference/tests/api/config_printer.h b/paddle/fluid/inference/tests/api/config_printer.h index b0c23fbd534847c8aad244749761e9c072148796..b7b39d4dd4675dd1bebec608914c2fe3153b360b 100644 --- a/paddle/fluid/inference/tests/api/config_printer.h +++ b/paddle/fluid/inference/tests/api/config_printer.h @@ -72,7 +72,8 @@ std::ostream &operator<<(std::ostream &os, const AnalysisConfig &config) { } os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.ir_optim() << "\n"; - os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.ir_optim() + os << GenSpaces(num_spaces) + << "use_runtime_context_cache: " << config.runtime_context_cache_enabled() << "\n"; os << GenSpaces(num_spaces) << "use_feed_fetch_ops: " << config.use_feed_fetch_ops_enabled() << "\n"; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 236afc77f708c344665821edd4f7c7841c300465..11e9725aeabf4472324d76aeb78c01f6be2e8c98 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -242,6 +242,10 @@ void BindAnalysisConfig(py::module *m) { .def("set_mkldnn_op", &AnalysisConfig::SetMKLDNNOp) .def("set_model_buffer", &AnalysisConfig::SetModelBuffer) .def("model_from_memory", &AnalysisConfig::model_from_memory) + .def("runtime_context_cache_enabled", + &AnalysisConfig::runtime_context_cache_enabled) + .def("switch_runtime_context_cache", + &AnalysisConfig::SwitchRuntimeContextCache, py::arg("x") = true) .def("pass_builder", &AnalysisConfig::pass_builder, py::return_value_policy::reference); }