diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index ba1d7379c56d953a0f37d03deed6c47e46cbf129..16fc1721eb6f5d2517ad45289f2415ef41749df2 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -68,6 +68,7 @@ pass_library(transpose_flatten_concat_fuse_pass inference) pass_library(identity_scale_op_clean_pass base) pass_library(sync_batch_norm_pass base) pass_library(runtime_context_cache_pass base) +pass_library(expected_kernel_cache_pass base) pass_library(quant_conv2d_dequant_fuse_pass inference) pass_library(fillconstant_elementwisemul_fuse inference) diff --git a/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc b/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee67af0aff5c90a9da0ece8f197d9a0c0a8e5b9c --- /dev/null +++ b/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/expected_kernel_cache_pass.h" +#include +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace framework { +namespace ir { + +void ExpectedKernelCachePass::ApplyImpl(ir::Graph* graph) const { + VLOG(3) << "Applies Expected Kernel Cache strategy."; + for (const Node* n : graph->Nodes()) { + if (n->IsOp()) { + n->Op()->SetAttr(kEnableCacheExpectedKernel, true); + } + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(expected_kernel_cache_pass, + paddle::framework::ir::ExpectedKernelCachePass); diff --git a/paddle/fluid/framework/ir/expected_kernel_cache_pass.h b/paddle/fluid/framework/ir/expected_kernel_cache_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..bf0907d3feb7bccd163363da65505e0af3fb9bf6 --- /dev/null +++ b/paddle/fluid/framework/ir/expected_kernel_cache_pass.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class ExpectedKernelCachePass : public Pass { + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 168f287a455c644695b6eaff426ce31ded8d38a5..0dfac96bfee868ad395366f4f8dd95e2c7796eb5 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -899,50 +899,23 @@ void OperatorWithKernel::RunImpl(const Scope& scope, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.Get(place); - // check if op[type] has kernel registered. - auto& all_op_kernels = AllOpKernels(); - auto kernels_iter = all_op_kernels.find(type_); - if (kernels_iter == all_op_kernels.end()) { - PADDLE_THROW( - "There are no kernels which are registered in the %s operator.", type_); + if (!HasAttr(kEnableCacheExpectedKernel) || !kernel_type_) { + ChooseKernel(*runtime_ctx, scope, place); } - OpKernelMap& kernels = kernels_iter->second; - - auto expected_kernel_key = this->GetExpectedKernelType( - ExecutionContext(*this, scope, *dev_ctx, *runtime_ctx, nullptr)); - VLOG(3) << "expected_kernel_key:" << expected_kernel_key; - - auto kernel_iter = kernels.find(expected_kernel_key); -#ifdef PADDLE_WITH_MKLDNN - // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set - if (kernel_iter == kernels.end() && - expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { - VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one"; - expected_kernel_key.library_type_ = LibraryType::kPlain; - expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; - kernel_iter = kernels.find(expected_kernel_key); - } -#endif - if (kernel_iter == kernels.end()) { - PADDLE_THROW("op %s does not have kernel for %s", type_, - KernelTypeToString(expected_kernel_key)); - } - - std::vector* kernel_configs = - GetKernelConfig(expected_kernel_key); + std::vector* kernel_configs = GetKernelConfig(*kernel_type_); // do data transformScope &transfer_scope; std::vector transfered_inplace_vars; - auto* transfer_scope = PrepareData(scope, expected_kernel_key, - &transfered_inplace_vars, runtime_ctx); + auto* transfer_scope = + PrepareData(scope, *kernel_type_, &transfered_inplace_vars, runtime_ctx); // exec scope is the scope that kernel actually executed on. const Scope& exec_scope = (transfer_scope == nullptr ? scope : *transfer_scope); - if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) { - dev_ctx = pool.Get(expected_kernel_key.place_); + if (!(kernel_type_->place_ == dev_ctx->GetPlace())) { + dev_ctx = pool.Get(kernel_type_->place_); } if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) { @@ -951,8 +924,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // not Scope. Imperative mode only pass inputs and get outputs. - kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx, - *runtime_ctx, kernel_configs)); + (*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx, + kernel_configs)); if (!transfered_inplace_vars.empty()) { // there is inplace variable has been transfered. @@ -978,6 +951,46 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } +void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, + const Scope& scope, + const platform::Place& place) const { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place); + + // check if op[type] has kernel registered. + auto& all_op_kernels = AllOpKernels(); + auto kernels_iter = all_op_kernels.find(type_); + if (kernels_iter == all_op_kernels.end()) { + PADDLE_THROW( + "There are no kernels which are registered in the %s operator.", type_); + } + + OpKernelMap& kernels = kernels_iter->second; + + auto expected_kernel_key = this->GetExpectedKernelType( + ExecutionContext(*this, scope, *dev_ctx, ctx, nullptr)); + VLOG(3) << "expected_kernel_key:" << expected_kernel_key; + + auto kernel_iter = kernels.find(expected_kernel_key); +#ifdef PADDLE_WITH_MKLDNN + // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set + if (kernel_iter == kernels.end() && + expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { + VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one"; + expected_kernel_key.library_type_ = LibraryType::kPlain; + expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; + kernel_iter = kernels.find(expected_kernel_key); + } +#endif + if (kernel_iter == kernels.end()) { + PADDLE_THROW("op %s does not have kernel for %s", type_, + KernelTypeToString(expected_kernel_key)); + } + + kernel_type_.reset(new OpKernelType(expected_kernel_key)); + kernel_func_.reset(new OpKernelFunc(kernel_iter->second)); +} + void OperatorWithKernel::TransferInplaceVarsBack( const Scope& scope, const std::vector& inplace_vars, const Scope& transfer_scope) const { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index a02e53dcf764368601646a900833ac650c5bb31a..8c5649deaa8c2c0ed1e976a8453730541adbdb88 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -70,6 +70,12 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@"; /// this Op's execution to save the elapsed time. constexpr char kEnableCacheRuntimeContext[] = "@ENABLE_CACHE_RUNTIME_CONTEXT@"; +/// If an Op has attribtue kEnableCacheExpectedKernel, it means that in a same +/// name scope and same place, since the expected kerenl of this Op does not +/// change in the execution, it could be recorded only at the first iteration of +/// this Op's execution to save the elapsed time. +constexpr char kEnableCacheExpectedKernel[] = "@ENABLE_CACHE_EXPECTED_KERNEL@"; + /// If an Op has this attribute, all its kernels should calculate output /// variable's shape in the corresponding Compute() function. And /// OperatorWithKernel::RunImpl() would skip call this Op's InferShape() @@ -491,8 +497,13 @@ class OperatorWithKernel : public OperatorBase { const std::vector& inplace_vars, const Scope& exec_scope) const; + void ChooseKernel(const RuntimeContext& ctx, const Scope& scope, + const platform::Place& place) const; + protected: mutable OpKernelConfigsMap kernel_configs_map_; + mutable std::unique_ptr kernel_type_; + mutable std::unique_ptr kernel_func_; mutable std::unique_ptr runtime_ctx_; mutable const Scope* pre_scope_ = nullptr; }; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 0109b4a4fa7617880f642f5a39639bca38475515..b54ea269ff250f02b6331807237e10ee65b0b0b4 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -231,6 +231,7 @@ void AnalysisConfig::Update() { pass_builder()->InsertPass(3, "tensorrt_subgraph_pass"); } pass_builder()->DeletePass("runtime_context_cache_pass"); + pass_builder()->DeletePass("expected_kernel_cache_pass"); } if (use_mkldnn_) { diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 1a87ac378a232f153a994c6b11ff37f8f5419a36..de564dbb40b3fed8cb165e34877a8cdc3ee5e349 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -99,6 +99,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_elementwise_add_fuse_pass", // #endif // "transpose_flatten_concat_fuse_pass", + "expected_kernel_cache_pass", // }); use_gpu_ = true; @@ -136,6 +137,7 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { "conv_bn_fuse_pass", // "conv_eltwiseadd_bn_fuse_pass", // "is_test_pass", // + "expected_kernel_cache_pass", // }); use_gpu_ = false;