From 3a53be006bacb8526147a19f949aea75728ae921 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Wed, 2 Nov 2022 16:04:29 +0800 Subject: [PATCH] Revert "[Kernel Selection] Remove hard code of PADDLE_WITH_CUDA (#47325)" This reverts commit f913404589569d5076120fe2bf380446cccdc7e9. --- .../new_executor/interpreter/data_transfer.cc | 9 ------- .../interpreter/interpreter_util.cc | 9 ------- paddle/fluid/framework/operator.cc | 24 ----------------- paddle/fluid/imperative/execution_context.h | 3 +-- paddle/fluid/imperative/prepared_operator.cc | 9 ------- paddle/fluid/operators/activation_op.cc | 8 ------ paddle/fluid/operators/affine_grid_op.cc | 23 +++++++++++++--- paddle/fluid/operators/conv_transpose_op.cc | 27 +++++++++++++++++++ paddle/fluid/operators/grid_sampler_op.cc | 26 +++++++++++++++--- paddle/fluid/operators/pool_op.cc | 21 +++++++++++++-- .../sequence_ops/sequence_softmax_op.cc | 16 +++++++++++ paddle/fluid/operators/softmax_op.cc | 16 +++++++++++ .../platform/device/gpu/cuda/cudnn_helper.h | 4 +-- .../platform/device/gpu/rocm/miopen_helper.h | 4 +-- 14 files changed, 124 insertions(+), 75 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index 20ccdece426..bf51ebd1d48 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -22,9 +22,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/onednn_context.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif namespace paddle { namespace framework { @@ -136,12 +133,6 @@ void DataTranferHelper::RunAndConstructOpFuncNode( auto* dev_ctx = pool.Get(place_); auto exec_ctx = ExecutionContext(*op, Scope(), *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op_with_kernel->DnnFallback() && - paddle::platform::CanCUDNNBeUsed(exec_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif VLOG(6) << "expected_kernel_key " << expected_kernel_key << "\n"; VLOG(6) << "op_with_kernel Type() " << op_with_kernel->Type() << "\n"; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 6c002d06b5b..3cb94a6470e 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -32,9 +32,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif PADDLE_DEFINE_EXPORTED_bool( new_executor_serial_run, @@ -621,12 +618,6 @@ void BuildOpFuncList(const platform::Place& place, *op_with_kernel, *runtime_scope, *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op_with_kernel->DnnFallback() && - paddle::platform::CanCUDNNBeUsed(exec_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif VLOG(4) << "expected_kernel_key : " << expected_kernel_key; // change device by the device_guard() ApplyDeviceGuard(op, place, &expected_kernel_key); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 5d24758de0b..b4714407686 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -58,10 +58,6 @@ class DenseTensor; #include "paddle/fluid/platform/device/mlu/mlu_info.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif - DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); @@ -1413,14 +1409,6 @@ bool OperatorWithKernel::SupportsKernelType( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { - auto tmp_kernel_type = kernel_type; - tmp_kernel_type.library_type_ = framework::LibraryType::kCUDNN; - return kernels.find(tmp_kernel_type) != kernels.end(); - } -#endif - return kernel_iter != kernels.end(); } @@ -1601,12 +1589,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { - kernel_type_->library_type_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(Liu-xiandong):In my ctest, this branch do not be executed, // I can't understand it, it's really confusing. // But we still need to keep this to avoid errors. @@ -1850,12 +1832,6 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif - if (HasAttr("op_device")) { if (Attr("op_device") == "cpu") { expected_kernel_key.place_ = platform::CPUPlace(); diff --git a/paddle/fluid/imperative/execution_context.h b/paddle/fluid/imperative/execution_context.h index 4ac885dbe3f..6d4f7c347b0 100644 --- a/paddle/fluid/imperative/execution_context.h +++ b/paddle/fluid/imperative/execution_context.h @@ -103,8 +103,7 @@ class DygraphExecutionContext : public framework::ExecutionContext { bool HasAttr(const std::string& name) const override { if (attrs_.find(name) == attrs_.end()) { - return &default_attrs_ != nullptr && - default_attrs_.find(name) != default_attrs_.end(); + return default_attrs_.find(name) != default_attrs_.end(); } return true; } diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 2a354742851..d76e06bd414 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -28,9 +28,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_op_list.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif #include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -249,12 +246,6 @@ PreparedOp PrepareImpl( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op.DnnFallback() && paddle::platform::CanCUDNNBeUsed(dygraph_exe_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif - #if defined(PADDLE_WITH_XPU) bool is_xpu_unsupport = paddle::platform::is_xpu_place(expected_kernel_key.place_) && diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 41f813b4362..8c46bc01079 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -93,14 +93,6 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, // library = framework::LibraryType::kCUDNN; // } // #endif - - // NOTE(jiahongyu): Activation ops have attribute use_cudnn, but cudnn kernels - // are temporarily disabled. Therefore, cudnn kernel also needs to fallback to - // plain GPU kernel temporarily. When above codes are uncommented, below - // fallback codes can be deleted safely. - if (paddle::platform::is_gpu_place(ctx.GetPlace())) { - oper.SetDnnFallback(true); - } return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 2d7eb04f1db..8d123710e75 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -134,8 +134,15 @@ class AffineGridOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + framework::LibraryType library{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library = framework::LibraryType::kCUDNN; + } +#endif auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Theta"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + return framework::OpKernelType( + data_type, ctx.GetPlace(), phi::DataLayout::kAnyLayout, library); } }; @@ -245,9 +252,17 @@ class AffineGridOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Output")); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Output")), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index e9c4245bc47..f5702f21794 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -28,6 +28,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif namespace paddle { namespace operators { @@ -37,6 +40,14 @@ using DataLayout = phi::DataLayout; framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -257,6 +268,14 @@ Example: framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -324,6 +343,14 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker { framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 7f57d6e288f..77865647c4c 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -35,8 +35,17 @@ class GridSampleOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; @@ -137,8 +146,17 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 48bfa3576ab..7842de9b17a 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -44,13 +44,21 @@ bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { + framework::LibraryType library_{framework::LibraryType::kPlain}; + phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu) END: Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(data_type, ctx.GetPlace()); + return framework::OpKernelType(data_type, ctx.GetPlace(), layout_, library_); } framework::OpKernelType PoolOp::GetKernelTypeForVar( @@ -78,13 +86,22 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar( framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { + framework::LibraryType library_{framework::LibraryType::kPlain}; + phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(input_data_type, ctx.GetPlace()); + return framework::OpKernelType( + input_data_type, ctx.GetPlace(), layout_, library_); } framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 80f13a51ab0..5b4b9aef886 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -43,6 +43,14 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -127,6 +135,14 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index bc11f53e009..42e0e5250e0 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -48,6 +48,14 @@ class SoftmaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -132,6 +140,14 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { PADDLE_THROW(platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h index 595f47d98e5..4fa25476336 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h @@ -617,8 +617,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && - ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); #ifdef PADDLE_WITH_CUDA if (use_cudnn) { auto& dev_ctx = ctx.device_context(); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h index 019fdce9e04..0c9d6d24cd1 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h @@ -554,8 +554,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && - ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); #ifdef PADDLE_WITH_HIP if (use_cudnn) { auto& dev_ctx = ctx.device_context(); -- GitLab