diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index bf51ebd1d48d761a8e3249e764c32ab18ef5cf29..20ccdece426c5698ff11075afa830e05ab9f5c7a 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -22,6 +22,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/onednn_context.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif namespace paddle { namespace framework { @@ -133,6 +136,12 @@ void DataTranferHelper::RunAndConstructOpFuncNode( auto* dev_ctx = pool.Get(place_); auto exec_ctx = ExecutionContext(*op, Scope(), *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op_with_kernel->DnnFallback() && + paddle::platform::CanCUDNNBeUsed(exec_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif VLOG(6) << "expected_kernel_key " << expected_kernel_key << "\n"; VLOG(6) << "op_with_kernel Type() " << op_with_kernel->Type() << "\n"; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 104217fa80f223861647550d448499f33b94260b..816331e3fa54900c0e2e261e28a70169c8139d4c 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -32,6 +32,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif PADDLE_DEFINE_EXPORTED_bool( new_executor_serial_run, @@ -615,6 +618,12 @@ void BuildOpFuncList(const platform::Place& place, *op_with_kernel, *runtime_scope, *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op_with_kernel->DnnFallback() && + paddle::platform::CanCUDNNBeUsed(exec_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif VLOG(4) << "expected_kernel_key : " << expected_kernel_key; // change device by the device_guard() ApplyDeviceGuard(op, place, &expected_kernel_key); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index b4714407686d85d0ffd0d5986ce780bcea818415..5d24758de0b93eea75c395f084d05d6a399fc47a 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -58,6 +58,10 @@ class DenseTensor; #include "paddle/fluid/platform/device/mlu/mlu_info.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif + DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); @@ -1409,6 +1413,14 @@ bool OperatorWithKernel::SupportsKernelType( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { + auto tmp_kernel_type = kernel_type; + tmp_kernel_type.library_type_ = framework::LibraryType::kCUDNN; + return kernels.find(tmp_kernel_type) != kernels.end(); + } +#endif + return kernel_iter != kernels.end(); } @@ -1589,6 +1601,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { + kernel_type_->library_type_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(Liu-xiandong):In my ctest, this branch do not be executed, // I can't understand it, it's really confusing. // But we still need to keep this to avoid errors. @@ -1832,6 +1850,12 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif + if (HasAttr("op_device")) { if (Attr("op_device") == "cpu") { expected_kernel_key.place_ = platform::CPUPlace(); diff --git a/paddle/fluid/imperative/execution_context.h b/paddle/fluid/imperative/execution_context.h index 6d4f7c347b097d4da67a93f52c4b6f2aeba3f260..4ac885dbe3f9739f78f6fbf66b3b5e57859918d2 100644 --- a/paddle/fluid/imperative/execution_context.h +++ b/paddle/fluid/imperative/execution_context.h @@ -103,7 +103,8 @@ class DygraphExecutionContext : public framework::ExecutionContext { bool HasAttr(const std::string& name) const override { if (attrs_.find(name) == attrs_.end()) { - return default_attrs_.find(name) != default_attrs_.end(); + return &default_attrs_ != nullptr && + default_attrs_.find(name) != default_attrs_.end(); } return true; } diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index d76e06bd4143e29635610826be570cf83fbe02b5..2a35474285113bea751bf9ec848fcf9c2a282b6d 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -28,6 +28,9 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_op_list.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif #include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -246,6 +249,12 @@ PreparedOp PrepareImpl( } #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (!op.DnnFallback() && paddle::platform::CanCUDNNBeUsed(dygraph_exe_ctx)) { + expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; + } +#endif + #if defined(PADDLE_WITH_XPU) bool is_xpu_unsupport = paddle::platform::is_xpu_place(expected_kernel_key.place_) && diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 6a239da553a58f2c4b7369671551e5464ad2d46f..b4cf9e9e009def25e53f690d914a38fbcb19abec 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -93,6 +93,14 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, // library = framework::LibraryType::kCUDNN; // } // #endif + + // NOTE(jiahongyu): Activation ops have attribute use_cudnn, but cudnn kernels + // are temporarily disabled. Therefore, cudnn kernel also needs to fallback to + // plain GPU kernel temporarily. When above codes are uncommented, below + // fallback codes can be deleted safely. + if (paddle::platform::is_gpu_place(ctx.GetPlace())) { + oper.SetDnnFallback(true); + } return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 8d123710e750e4ad8e7edf222a76afd534b2390d..2d7eb04f1dba0d0a54253a4a416f59643213d40a 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -134,15 +134,8 @@ class AffineGridOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library = framework::LibraryType::kCUDNN; - } -#endif auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Theta"); - return framework::OpKernelType( - data_type, ctx.GetPlace(), phi::DataLayout::kAnyLayout, library); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; @@ -252,17 +245,9 @@ class AffineGridOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Output")), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Output")); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index f5702f2179431d0c0bf15b2f302cf091cc4dc822..e9c4245bc4731680ee16586dccde9e7f5e4a53dc 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -28,9 +28,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif namespace paddle { namespace operators { @@ -40,14 +37,6 @@ using DataLayout = phi::DataLayout; framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -268,14 +257,6 @@ Example: framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -343,14 +324,6 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker { framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(data_type, - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 77865647c4c5bbfb3488f808bb69e5cd5a967c47..7f57d6e288f87a8a59f63bba70768d08882313f9 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -35,17 +35,8 @@ class GridSampleOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; @@ -146,17 +137,8 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace(), - phi::DataLayout::kAnyLayout, - library_); + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 7842de9b17a3bfc4ea769cf2b09f04ff6b641719..48bfa3576ab6c880c32b100ce25a1e082a0a229f 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -44,21 +44,13 @@ bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { - framework::LibraryType library_{framework::LibraryType::kPlain}; - phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu) END: Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(data_type, ctx.GetPlace(), layout_, library_); + return framework::OpKernelType(data_type, ctx.GetPlace()); } framework::OpKernelType PoolOp::GetKernelTypeForVar( @@ -86,22 +78,13 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar( framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { - framework::LibraryType library_{framework::LibraryType::kPlain}; - phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType( - input_data_type, ctx.GetPlace(), layout_, library_); + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 5b4b9aef88637326d892e222ef3ec7e2ccf5084c..80f13a51ab0b1249bbbca9e39ace4c219aee65a6 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -43,14 +43,6 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -135,14 +127,6 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 42e0e5250e0841794241785b4648466c4f32b359..bc11f53e009353092d353babcfa3725544b82276 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -48,14 +48,6 @@ class SoftmaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -140,14 +132,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { PADDLE_THROW(platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (platform::CanCUDNNBeUsed(ctx)) { - return framework::OpKernelType(input_data_type, - ctx.GetPlace(), - layout_, - framework::LibraryType::kCUDNN); - } -#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h index 4fa25476336f67f52c0b1c4d490bbe744a54e9fe..595f47d98e56b1db5b6bcbb14a3dda82671e6616 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h @@ -617,8 +617,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); - use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); + bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && + ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); #ifdef PADDLE_WITH_CUDA if (use_cudnn) { auto& dev_ctx = ctx.device_context(); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h index 0c9d6d24cd1bfa7ae101f20c4bb19cf936dbf8be..019fdce9e044b1579436788de7efa832f2b3481f 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h @@ -554,8 +554,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); - use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); + bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && + ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); #ifdef PADDLE_WITH_HIP if (use_cudnn) { auto& dev_ctx = ctx.device_context();