diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index 20ccdece426c5698ff11075afa830e05ab9f5c7a..bf51ebd1d48d761a8e3249e764c32ab18ef5cf29 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -22,9 +22,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/onednn_context.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif namespace paddle { namespace framework { @@ -136,12 +133,6 @@ void DataTranferHelper::RunAndConstructOpFuncNode( auto* dev_ctx = pool.Get(place_); auto exec_ctx = ExecutionContext(*op, Scope(), *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op_with_kernel->DnnFallback() && - paddle::platform::CanCUDNNBeUsed(exec_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif VLOG(6) << "expected_kernel_key " << expected_kernel_key << "\n"; VLOG(6) << "op_with_kernel Type() " << op_with_kernel->Type() << "\n"; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 6c002d06b5b193ed0bcf89539313b78ba475e095..3cb94a6470ef51165c9aa133571e2645bfde02e4 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -32,9 +32,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif PADDLE_DEFINE_EXPORTED_bool( new_executor_serial_run, @@ -621,12 +618,6 @@ void BuildOpFuncList(const platform::Place& place, *op_with_kernel, *runtime_scope, *dev_ctx, runtime_context); auto expected_kernel_key = op_with_kernel->GetExpectedKernelType(exec_ctx); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op_with_kernel->DnnFallback() && - paddle::platform::CanCUDNNBeUsed(exec_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif VLOG(4) << "expected_kernel_key : " << expected_kernel_key; // change device by the device_guard() ApplyDeviceGuard(op, place, &expected_kernel_key); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 5d24758de0b93eea75c395f084d05d6a399fc47a..b4714407686d85d0ffd0d5986ce780bcea818415 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -58,10 +58,6 @@ class DenseTensor; #include "paddle/fluid/platform/device/mlu/mlu_info.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif - DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); @@ -1413,14 +1409,6 @@ bool OperatorWithKernel::SupportsKernelType( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { - auto tmp_kernel_type = kernel_type; - tmp_kernel_type.library_type_ = framework::LibraryType::kCUDNN; - return kernels.find(tmp_kernel_type) != kernels.end(); - } -#endif - return kernel_iter != kernels.end(); } @@ -1601,12 +1589,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(exe_ctx)) { - kernel_type_->library_type_ = framework::LibraryType::kCUDNN; - } -#endif - // NOTE(Liu-xiandong):In my ctest, this branch do not be executed, // I can't understand it, it's really confusing. // But we still need to keep this to avoid errors. @@ -1850,12 +1832,6 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!this->DnnFallback() && paddle::platform::CanCUDNNBeUsed(ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif - if (HasAttr("op_device")) { if (Attr("op_device") == "cpu") { expected_kernel_key.place_ = platform::CPUPlace(); diff --git a/paddle/fluid/imperative/execution_context.h b/paddle/fluid/imperative/execution_context.h index 4ac885dbe3f9739f78f6fbf66b3b5e57859918d2..6d4f7c347b097d4da67a93f52c4b6f2aeba3f260 100644 --- a/paddle/fluid/imperative/execution_context.h +++ b/paddle/fluid/imperative/execution_context.h @@ -103,8 +103,7 @@ class DygraphExecutionContext : public framework::ExecutionContext { bool HasAttr(const std::string& name) const override { if (attrs_.find(name) == attrs_.end()) { - return &default_attrs_ != nullptr && - default_attrs_.find(name) != default_attrs_.end(); + return default_attrs_.find(name) != default_attrs_.end(); } return true; } diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 2a35474285113bea751bf9ec848fcf9c2a282b6d..d76e06bd4143e29635610826be570cf83fbe02b5 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -28,9 +28,6 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_op_list.h" #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#endif #include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -249,12 +246,6 @@ PreparedOp PrepareImpl( } #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!op.DnnFallback() && paddle::platform::CanCUDNNBeUsed(dygraph_exe_ctx)) { - expected_kernel_key.library_type_ = framework::LibraryType::kCUDNN; - } -#endif - #if defined(PADDLE_WITH_XPU) bool is_xpu_unsupport = paddle::platform::is_xpu_place(expected_kernel_key.place_) && diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 41f813b436293e102a7680e1603a77beaa2b9f31..8c46bc01079aded7c7c5a47d21831aa66d7ddd63 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -93,14 +93,6 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, // library = framework::LibraryType::kCUDNN; // } // #endif - - // NOTE(jiahongyu): Activation ops have attribute use_cudnn, but cudnn kernels - // are temporarily disabled. Therefore, cudnn kernel also needs to fallback to - // plain GPU kernel temporarily. When above codes are uncommented, below - // fallback codes can be deleted safely. - if (paddle::platform::is_gpu_place(ctx.GetPlace())) { - oper.SetDnnFallback(true); - } return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 2d7eb04f1dba0d0a54253a4a416f59643213d40a..8d123710e750e4ad8e7edf222a76afd534b2390d 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -134,8 +134,15 @@ class AffineGridOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + framework::LibraryType library{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library = framework::LibraryType::kCUDNN; + } +#endif auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Theta"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + return framework::OpKernelType( + data_type, ctx.GetPlace(), phi::DataLayout::kAnyLayout, library); } }; @@ -245,9 +252,17 @@ class AffineGridOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Output")); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Output")), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index e9c4245bc4731680ee16586dccde9e7f5e4a53dc..f5702f2179431d0c0bf15b2f302cf091cc4dc822 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -28,6 +28,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#endif namespace paddle { namespace operators { @@ -37,6 +40,14 @@ using DataLayout = phi::DataLayout; framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -257,6 +268,14 @@ Example: framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } @@ -324,6 +343,14 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker { framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(data_type, + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 7f57d6e288f87a8a59f63bba70768d08882313f9..77865647c4c5bbfb3488f808bb69e5cd5a967c47 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -35,8 +35,17 @@ class GridSampleOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; @@ -137,8 +146,17 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(data_type, ctx.GetPlace()); + framework::LibraryType library_{framework::LibraryType::kPlain}; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.GetPlace(), + phi::DataLayout::kAnyLayout, + library_); } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 48bfa3576ab6c880c32b100ce25a1e082a0a229f..7842de9b17a3bfc4ea769cf2b09f04ff6b641719 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -44,13 +44,21 @@ bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { + framework::LibraryType library_{framework::LibraryType::kPlain}; + phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu) END: Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(data_type, ctx.GetPlace()); + return framework::OpKernelType(data_type, ctx.GetPlace(), layout_, library_); } framework::OpKernelType PoolOp::GetKernelTypeForVar( @@ -78,13 +86,22 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar( framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { + framework::LibraryType library_{framework::LibraryType::kPlain}; + phi::DataLayout layout_ = phi::DataLayout::kAnyLayout; auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kCUDNN; + } +#endif + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_MKLDNN this->SetDnnFallback(!CanMKLDNNSupportPool(ctx)); // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN - return framework::OpKernelType(input_data_type, ctx.GetPlace()); + return framework::OpKernelType( + input_data_type, ctx.GetPlace(), layout_, library_); } framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 80f13a51ab0b1249bbbca9e39ace4c219aee65a6..5b4b9aef88637326d892e222ef3ec7e2ccf5084c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -43,6 +43,14 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -127,6 +135,14 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { if (ctx.HasAttr("data_format")) { layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index bc11f53e009353092d353babcfa3725544b82276..42e0e5250e0841794241785b4648466c4f32b359 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -48,6 +48,14 @@ class SoftmaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; @@ -132,6 +140,14 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { PADDLE_THROW(platform::errors::InvalidArgument( "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (platform::CanCUDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + layout_, + framework::LibraryType::kCUDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_); } }; diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h index 595f47d98e56b1db5b6bcbb14a3dda82671e6616..4fa25476336f67f52c0b1c4d490bbe744a54e9fe 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h @@ -617,8 +617,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && - ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); #ifdef PADDLE_WITH_CUDA if (use_cudnn) { auto& dev_ctx = ctx.device_context(); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h index 019fdce9e044b1579436788de7efa832f2b3481f..0c9d6d24cd1bfa7ae101f20c4bb19cf936dbf8be 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h @@ -554,8 +554,8 @@ class ScopedActivationDescriptor { }; inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { - bool use_cudnn = paddle::platform::is_gpu_place(ctx.GetPlace()) && - ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr("use_cudnn"); + use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace()); #ifdef PADDLE_WITH_HIP if (use_cudnn) { auto& dev_ctx = ctx.device_context();