Merge pull request #16301 from luotao1/runtime_context_pass

add runtime_context_cache_pass

Merge pull request #16301 from luotao1/runtime_context_pass
add runtime_context_cache_pass
a5124ee0 · Tao Luo · GitHub · 2de263a5 · 82af8031 · a5124ee0
7 changed file
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -70,6 +70,7 @@ pass_library(conv_affine_channel_fuse_pass inference)
 pass_library(transpose_flatten_concat_fuse_pass inference)
 pass_library(identity_scale_op_clean_pass base)
 pass_library(sync_batch_norm_pass base)
+pass_library(runtime_context_cache_pass base)
 # There may be many transpose-flatten structures in a model, and the output of
 # these structures will be used as inputs to the concat Op. This pattern will

--- a/paddle/fluid/framework/ir/runtime_context_cache_pass.cc
+++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/framework/ir/runtime_context_cache_pass.h"
+#include <memory>
+#include "paddle/fluid/framework/operator.h"
+namespace paddle {
+namespace framework {
+namespace ir {
+std::unique_ptr<ir::Graph> RuntimeContextCachePass::ApplyImpl(
+    std::unique_ptr<ir::Graph> graph) const {
+  VLOG(3) << "Applies Runtime Context Cache strategy.";
+  for (const Node* n : graph->Nodes()) {
+    if (n->IsOp()) {
+      n->Op()->SetAttr(kEnableCacheRuntimeContext, true);
+    }
+  }
+  return graph;
+}
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+REGISTER_PASS(runtime_context_cache_pass,
+              paddle::framework::ir::RuntimeContextCachePass);
--- a/paddle/fluid/framework/ir/runtime_context_cache_pass.h
+++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.h
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <memory>
+#include "paddle/fluid/framework/ir/pass.h"
+namespace paddle {
+namespace framework {
+namespace ir {
+class RuntimeContextCachePass : public Pass {
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(
+      std::unique_ptr<ir::Graph> graph) const override;
+};
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -876,7 +876,22 @@ std::vector<KernelConfig>* OperatorWithKernel::GetKernelConfig(
 void OperatorWithKernel::RunImpl(const Scope& scope,
                                 const platform::Place& place) const {
+  if (!HasAttr(kEnableCacheRuntimeContext)) {
    RuntimeContext ctx(Inputs(), Outputs(), scope);
+    RunImpl(scope, place, &ctx);
+  } else {
+    const Scope* cur_scope = &scope;
+    if (!runtime_ctx_ || pre_scope_ != cur_scope) {
+      runtime_ctx_.reset(new RuntimeContext(Inputs(), Outputs(), scope));
+      pre_scope_ = cur_scope;
+    }
+    RunImpl(scope, place, runtime_ctx_.get());
+  }
+}
+void OperatorWithKernel::RunImpl(const Scope& scope,
+                                 const platform::Place& place,
+                                 RuntimeContext* runtime_ctx) const {
  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
  auto* dev_ctx = pool.Get(place);
@@ -891,7 +906,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
  OpKernelMap& kernels = kernels_iter->second;
  auto expected_kernel_key = this->GetExpectedKernelType(
-      ExecutionContext(*this, scope, *dev_ctx, ctx, nullptr));
+      ExecutionContext(*this, scope, *dev_ctx, *runtime_ctx, nullptr));
  VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
  auto kernel_iter = kernels.find(expected_kernel_key);
@@ -915,8 +930,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
  // do data transformScope &transfer_scope;
  std::vector<std::string> transfered_inplace_vars;
-  auto* transfer_scope =
+  auto* transfer_scope = PrepareData(scope, expected_kernel_key,
-      PrepareData(scope, expected_kernel_key, &transfered_inplace_vars, &ctx);
+                                     &transfered_inplace_vars, runtime_ctx);
  // exec scope is the scope that kernel actually executed on.
  const Scope& exec_scope =
@@ -927,13 +942,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
  }
  if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) {
-    RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx);
+    RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
    this->InferShape(&infer_shape_ctx);
  }
  // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
  // not Scope. Imperative mode only pass inputs and get outputs.
-  kernel_iter->second(
+  kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx,
-      ExecutionContext(*this, exec_scope, *dev_ctx, ctx, kernel_configs));
+                                       *runtime_ctx, kernel_configs));
  if (!transfered_inplace_vars.empty()) {
    // there is inplace variable has been transfered.

--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -62,6 +62,14 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
 /// Variables with this suffix are the new Gradient.
 constexpr char kNewGradSuffix[] = "@NEWGRAD@";
+/// RuntimeContext is used to relate input/output names of Operator with
+/// the corresponding variables in name scope.
+/// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same
+/// name scope, since the input/output names of this Op do not change in the
+/// execution, RuntimeContext could be created only at the first iteration of
+/// this Op's execution to save the elapsed time.
+constexpr char kEnableCacheRuntimeContext[] = "@ENABLE_CACHE_RUNTIME_CONTEXT@";
 /// If an Op has this attribute, all its kernels should calculate output
 /// variable's shape in the corresponding Compute() function. And
 /// OperatorWithKernel::RunImpl() would skip call this Op's InferShape()
@@ -456,6 +464,8 @@ class OperatorWithKernel : public OperatorBase {
  // same.
  proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const;
  void RunImpl(const Scope& scope, const platform::Place& place) const final;
+  void RunImpl(const Scope& scope, const platform::Place& place,
+               RuntimeContext* runtime_ctx) const;
  /**
   * Transfer data from scope to a transfered scope. If there is no data need to
@@ -474,6 +484,8 @@ class OperatorWithKernel : public OperatorBase {
 protected:
  mutable OpKernelConfigsMap kernel_configs_map_;
+  mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
+  mutable const Scope* pre_scope_ = nullptr;
 };
 extern bool OpSupportGPU(const std::string& op_type);

--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -202,6 +202,7 @@ void AnalysisConfig::Update() {
      // Append after the Affine_channel_conv_fuse pass.
      pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
    }
+    pass_builder()->DeletePass("runtime_context_cache_pass");
  }
  if (use_mkldnn_) {

--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -80,6 +80,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
        "conv_elementwise_add_act_fuse_pass",   //
        "conv_elementwise_add2_act_fuse_pass",  //
        "conv_elementwise_add_fuse_pass",       //
+        "runtime_context_cache_pass",           //
 #endif
  });
@@ -115,6 +116,7 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
      "conv_eltwiseadd_bn_fuse_pass",  //
      "is_test_pass",                  //
      "identity_scale_op_clean_pass",  //
+      "runtime_context_cache_pass",    //
  });
  use_gpu_ = false;
 }