diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index 3d66ed788c6a944dfb2afd083513349a11d89c48..fd2c8a024c26a6ba102fdf565a96249f01140744 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -22,6 +22,9 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/errors.h" +#ifdef PADDLE_WITH_CUSTOM_DEVICE +#include "paddle/phi/backends/device_manager.h" +#endif namespace paddle { namespace framework { @@ -44,16 +47,19 @@ bool PhiKernelSupportPrecision( return phi::KernelFactory::Instance().HasKernel(op_type, kernel_key); } -bool GpuKernelSupportPrecision( +bool KernelSupportPrecision( const std::string& op_type, + phi::Backend backend, phi::DataType precision, phi::DataLayout layout = phi::DataLayout::ALL_LAYOUT) { auto phi_op_type = phi::TransToPhiKernelName(op_type); - bool support = PhiKernelSupportPrecision( - phi_op_type, phi::Backend::GPU, precision, layout); - support |= PhiKernelSupportPrecision( - phi_op_type, phi::Backend::GPUDNN, precision, layout); + bool support = + PhiKernelSupportPrecision(phi_op_type, backend, precision, layout); + if (backend == phi::Backend::GPU) { + support |= PhiKernelSupportPrecision( + phi_op_type, phi::Backend::GPUDNN, precision, layout); + } if (!support) { const auto& all_kernels = framework::OperatorWithKernel::AllOpKernels(); auto it = all_kernels.find(op_type); @@ -146,11 +152,15 @@ bool OpSupportPrecision(const std::string& op_type, const std::unordered_set& black_list) { bool support = false; if (black_list.count(op_type) == 0) { - if (backend == phi::Backend::GPU) { - support = GpuKernelSupportPrecision(op_type, precision); + // Actual custom backend will be added after the NUM_BACKENDS. + // We use this feature to determine whether backend is custom device. + if (backend == phi::Backend::GPU || + static_cast(backend) > + static_cast(phi::Backend::NUM_BACKENDS)) { + support = KernelSupportPrecision(op_type, backend, precision); } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Now, only support backend of GPU.")); + "Now, only support backend of GPU and Custom Device .")); } } return support; @@ -183,11 +193,28 @@ void AutoMixedPrecisionPass::SetDefaultBlacklist() const { void AutoMixedPrecisionPass::Init(Graph* graph) const { bool enable_gpu_mixed = Get("enable_gpu_mixed"); + bool enable_custom_device_mixed = false; + if (Has("enable_custom_device_mixed")) { + enable_custom_device_mixed = Get("enable_custom_device_mixed"); + } if (enable_gpu_mixed) { backend_ = phi::Backend::GPU; + } else if (enable_custom_device_mixed) { +// transform Backend::CUSTOM to actual backend. +// Here, we only consider one custom backend. +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto device_type = phi::DeviceManager::GetAllCustomDeviceTypes()[0]; + backend_ = static_cast( + static_cast(phi::Backend::NUM_BACKENDS) + + phi::CustomRegisteredDeviceMap::Instance() + .GetOrRegisterGlobalDeviceTypeId(device_type)); +#else + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot enable custom_device_mixed.")); +#endif } - - skip_pass_ = !enable_gpu_mixed; + skip_pass_ = !enable_gpu_mixed && !enable_custom_device_mixed; low_precision_ = static_cast(Get("mixed_precision_mode")); @@ -466,8 +493,8 @@ void AutoMixedPrecisionPass::UpdateOpPrecision() const { // when op_1 only support cpu kernel. if op_2's intput var is op_1's // output var, then op_2 should not run at low precision. if (GetOpOriginalType(op_type) != "feed" && - !GpuKernelSupportPrecision(GetOpOriginalType(op_type), - phi::DataType::FLOAT32)) { + !KernelSupportPrecision( + GetOpOriginalType(op_type), backend_, phi::DataType::FLOAT32)) { for (auto* out_var_node : op_node->outputs) { CHECK_EQ(out_var_node->IsVar(), true); if (out_var_node->Var()->Persistable()) continue; diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc index f1939fc8b328b8229b24447bae118254c55ea18c..e703b7d5bf630589ab5b689a866ea0d0eae3d89c 100644 --- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc +++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc @@ -48,10 +48,10 @@ ConvertToMixedPrecisionPass::ConvertToMixedPrecisionPass( "support fp16 and bf16.", static_cast(mixed_precision_))); } - if (backend_ != phi::Backend::GPU) { + if (backend_ != phi::Backend::GPU && backend_ != phi::Backend::CUSTOM) { PADDLE_THROW(paddle::platform::errors::InvalidArgument( "mixed_precision currently not supported place %d, we now only " - "support gpu.", + "support gpu and custom device .", static_cast(backend_))); } } @@ -72,7 +72,13 @@ void ConvertToMixedPrecisionPass::Run() { pass.Set("mixed_precision_mode", new int{static_cast(mixed_precision_)}); pass.Set("mixed_black_list", new std::unordered_set{black_list_}); - pass.Set("enable_gpu_mixed", new bool{true}); + if (backend_ == phi::Backend::GPU) { + pass.Set("enable_gpu_mixed", new bool{true}); + pass.Set("enable_custom_device_mixed", new bool{false}); + } else if (backend_ == phi::Backend::CUSTOM) { + pass.Set("enable_gpu_mixed", new bool{false}); + pass.Set("enable_custom_device_mixed", new bool{true}); + } pass.Set("keep_io_types", new bool{keep_io_types_}); pass.Apply(main_graph_.get()); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 8083cf85e0d6afb9f3e80bc84d5dde8e1b67dd1a..5604e913fd01234f4ad578739dec6a21f8966935 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -146,6 +146,8 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) { return phi::Backend::CPU; case paddle_infer::PlaceType::kIPU: return phi::Backend::IPU; + case paddle_infer::PlaceType::kCUSTOM: + return phi::Backend::CUSTOM; default: PADDLE_THROW(paddle::platform::errors::InvalidArgument( "Paddle Inference not support backend, we now only support GPU, XPU, " diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h index cd9c24436da97eadd772100e99f71da441317d9e..4e7ec83c427bae8e8d436cc6b2502aaa10ae624b 100644 --- a/paddle/phi/common/backend.h +++ b/paddle/phi/common/backend.h @@ -59,6 +59,9 @@ enum class Backend : uint8_t { // paddle kernel primitives backend KPS, + // custom device reference + CUSTOM, + // end of backend types NUM_BACKENDS, @@ -207,7 +210,7 @@ inline std::string BackendToString(const Backend& backend) { return "KPS"; case Backend::IPU: return "IPU"; - default: + default: { size_t device_type_id_ = static_cast(backend) - static_cast(Backend::NUM_BACKENDS); std::string device_type = @@ -219,6 +222,7 @@ inline std::string BackendToString(const Backend& backend) { PD_THROW( "Invalid enum backend type `", static_cast(backend), "`."); } + } } } diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py index 80b91ca8f01338031ed0cd70760c461c1740c9a0..9bf1ff43a26e96d5463faabad8818f1691b30346 100755 --- a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py +++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py @@ -258,6 +258,41 @@ class TestCustomCPUPlugin(unittest.TestCase): avg_loss.backward() sgd.step() + def _test_custom_device_mix_precision(self): + import tempfile + + import paddle + from paddle.inference import ( + PlaceType, + PrecisionType, + convert_to_mixed_precision, + ) + from paddle.jit import to_static + from paddle.static import InputSpec + from paddle.vision.models import resnet50 + + self.temp_dir = tempfile.TemporaryDirectory() + model = resnet50(True) + net = to_static( + model, input_spec=[InputSpec(shape=[None, 3, 224, 224], name='x')] + ) + paddle.jit.save( + net, os.path.join(self.temp_dir.name, 'resnet50/inference') + ) + convert_to_mixed_precision( + os.path.join(self.temp_dir.name, 'resnet50/inference.pdmodel'), + os.path.join(self.temp_dir.name, 'resnet50/inference.pdiparams'), + os.path.join( + self.temp_dir.name, 'mixed_precision/inference.pdmodel' + ), + os.path.join( + self.temp_dir.name, 'mixed_precision/inference.pdiparams' + ), + backend=PlaceType.CUSTOM, + mixed_precision=PrecisionType.Half, + ) + self.temp_dir.cleanup() + def _test_custom_device_py_api(self): import paddle