From f913404589569d5076120fe2bf380446cccdc7e9 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 1 Nov 2022 20:51:45 +0800 Subject: [PATCH] [Kernel Selection] Remove hard code of PADDLE_WITH_CUDA (#47325) * move cudnn hardcode outside GetExpectedKernelType * add header file * debug * update interpreter_util with hardcode * update interpreter_util headerfile * solve activation hardcode * debug with CI * add mkldnn_op_list header file * temporarily uncomment mkldnn * temporarily uncomment mkldnn * delete sequence_softmax cudnn hardcode * add hardcode to data_transfer.cc * update data_transfer headerfile * try fix segment fault * update cudnn&miopen_helper * reset HasAttr of DygraphExctnCtx * debug, this commit should pass all CI * debug should pass CI, temporarily disable activation * debug should pass CI * fix default_attr=nullptr bug * clean debug code --- .../new_executor/interpreter/data_transfer.cc | 9 +++++++ .../interpreter/interpreter_util.cc | 9 +++++++ paddle/fluid/framework/operator.cc | 24 +++++++++++++++++ paddle/fluid/imperative/execution_context.h | 3 ++- paddle/fluid/imperative/prepared_operator.cc | 9 +++++++ paddle/fluid/operators/activation_op.cc | 8 ++++++ paddle/fluid/operators/affine_grid_op.cc | 23 +++------------- paddle/fluid/operators/conv_transpose_op.cc | 27 ------------------- paddle/fluid/operators/grid_sampler_op.cc | 26 +++--------------- paddle/fluid/operators/pool_op.cc | 21 ++------------- .../sequence_ops/sequence_softmax_op.cc | 16 ----------- paddle/fluid/operators/softmax_op.cc | 16 ----------- .../platform/device/gpu/cuda/cudnn_helper.h | 4 +-- .../platform/device/gpu/rocm/miopen_helper.h | 4 +-- 14 files changed, 75 insertions(+), 124 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index bf51ebd1d4..20ccdece42 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -22,6 +22,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/onednn_context.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif namespace paddle { namespace framework { @@ -133,6 +136,12 @@ void DataTranferHelper::RunAndConstructOpFuncNode( auto* dev_ctx = pool.Get(place_); auto exec_ctx = ExecutionContext(*op, Scope(), *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op_with_kernel->DnnFallback() && + paddle::platform::CanCUDNNBeUsed(exec_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif VLOG(6) << "expected_kernel_key " << expected_kernel_key << "\n"; VLOG(6) << "op_with_kernel Type() " << op_with_kernel->Type() << "\n"; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 104217fa80..816331e3fa 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -32,6 +32,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif PADDLE_DEFINE_EXPORTED_bool( new_executor_serial_run, @@ -615,6 +618,12 @@ void BuildOpFuncList(const platform::Place& place, *op_with_kernel, *runtime_scope, *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op_with_kernel->DnnFallback() && + paddle::platform::CanCUDNNBeUsed(exec_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif VLOG(4) << "expected_kernel_key : " << expected_kernel_key; // change device by the device_guard() ApplyDeviceGuard(op, place, &expected_kernel_key); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index b471440768..5d24758de0 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -58,6 +58,10 @@ class DenseTensor; #include "paddle/fluid/platform/device/mlu/mlu_info.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif + DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); @@ -1409,6 +1413,14 @@ bool OperatorWithKernel::SupportsKernelType( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { + auto tmp_kernel_type = kernel_type; + tmp_kernel_type.library_type_ = framework::LibraryType::kCUDNN; + return kernels.find(tmp_kernel_type) != kernels.end(); + } +#endif + return kernel_iter != kernels.end(); } @@ -1589,6 +1601,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { + kernel_type_->library_type_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(Liu-xiandong):In my ctest, this branch do not be executed, // I can't understand it, it's really confusing. // But we still need to keep this to avoid errors. @@ -1832,6 +1850,12 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif + if (HasAttr("op_device")) { if (Attr("op_device") == "cpu") { expected_kernel_key.place_ = platform::CPUPlace(); diff --git a/paddle/fluid/imperative/execution_context.h b/paddle/fluid/imperative/execution_context.h index 6d4f7c347b..4ac885dbe3 100644 --- a/paddle/fluid/imperative/execution_context.h +++ b/paddle/fluid/imperative/execution_context.h @@ -103,7 +103,8 @@ class DygraphExecutionContext : public framework::ExecutionContext { bool HasAttr(const std::string& name) const override { if (attrs_.find(name) == attrs_.end()) { - return default_attrs_.find(name) != default_attrs_.end(); + return &default_attrs_ != nullptr && + default_attrs_.find(name) != default_attrs_.end(); } return true; } diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index d76e06bd41..2a35474285 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -28,6 +28,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_op_list.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif #include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -246,6 +249,12 @@ PreparedOp PrepareImpl( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op.DnnFallback() && paddle::platform::CanCUDNNBeUsed(dygraph_exe_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif + #if defined(PADDLE_WITH_XPU) bool is_xpu_unsupport = paddle::platform::is_xpu_place(expected_kernel_key.place_) && diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 6a239da553..b4cf9e9e00 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -93,6 +93,14 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, // library = framework::LibraryType::kCUDNN; // } // #endif + + // NOTE(jiahongyu): Activation ops have attribute use_cudnn, but cudnn kernels + // are temporarily disabled. Therefore, cudnn kernel also needs to fallback to + // plain GPU kernel temporarily. When above codes are uncommented, below + // fallback codes can be deleted safely. + if (paddle::platform::is_gpu_place(ctx.GetPlace())) { + oper.SetDnnFallback(true); + } return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 8d123710e7..2d7eb04f1d 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -134,15 +134,8 @@ class AffineGridOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library = framework::LibraryType::kCUDNN; - } -#endif auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Theta"); - return framework::OpKernelType( - data_type, ctx.GetPlace(), phi::DataLayout::kAnyLayout, library); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; @@ -252,17 +245,9 @@ class AffineGridOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Output")), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Output")); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index f5702f2179..e9c4245bc4 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -28,9 +28,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif namespace paddle { namespace operators { @@ -40,14 +37,6 @@ using DataLayout = phi::DataLayout; framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -268,14 +257,6 @@ Example: framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -343,14 +324,6 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker { framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 77865647c4..7f57d6e288 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -35,17 +35,8 @@ class GridSampleOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; @@ -146,17 +137,8 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 7842de9b17..48bfa3576a 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -44,21 +44,13 @@ bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { - framework::LibraryType library_{framework::LibraryType::kPlain}; - phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu) END: Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(data_type, ctx.GetPlace(), layout_, library_); + return framework::OpKernelType(data_type, ctx.GetPlace()); } framework::OpKernelType PoolOp::GetKernelTypeForVar( @@ -86,22 +78,13 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar( framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { - framework::LibraryType library_{framework::LibraryType::kPlain}; - phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType( - input_data_type, ctx.GetPlace(), layout_, library_); + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 5b4b9aef88..80f13a51ab 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -43,14 +43,6 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -135,14 +127,6 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 42e0e5250e..bc11f53e00 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -48,14 +48,6 @@ class SoftmaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -140,14 +132,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { PADDLE_THROW(platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h index 4fa2547633..595f47d98e 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h @@ -617,8 +617,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); - use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); + bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && + ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); #ifdef PADDLE_WITH_CUDA if (use_cudnn) { auto& dev_ctx = ctx.device_context(); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h index 0c9d6d24cd..019fdce9e0 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h @@ -554,8 +554,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); - use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); + bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && + ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); #ifdef PADDLE_WITH_HIP if (use_cudnn) { auto& dev_ctx = ctx.device_context(); -- GitLab