diff --git a/paddle/fluid/eager/amp_auto_cast.h b/paddle/fluid/eager/amp_auto_cast.h index 5110f6f883e67251d0194087dcdd62d5de49e7ec..a16dd95396427a4a866a7155fc4955668fa71ecc 100644 --- a/paddle/fluid/eager/amp_auto_cast.h +++ b/paddle/fluid/eager/amp_auto_cast.h @@ -29,7 +29,8 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor, paddle::platform::is_xpu_place(place) || paddle::platform::is_mlu_place(place) || paddle::platform::is_npu_place(place) || - paddle::platform::is_npu_pinned_place(place)) { + paddle::platform::is_npu_pinned_place(place) || + paddle::platform::is_custom_place(place)) { // CudaPinndePlace is added for varbase created by dataloader if ((data_type == paddle::experimental::DataType::FLOAT32 || data_type == paddle::experimental::DataType::FLOAT16 || diff --git a/paddle/fluid/eager/eager_amp_auto_cast.h b/paddle/fluid/eager/eager_amp_auto_cast.h index 42961b84bcdb0286c46ff76097642af47ae5c34d..22748e31cfd7abcd4fcf12d5a55cf28bf7678de8 100644 --- a/paddle/fluid/eager/eager_amp_auto_cast.h +++ b/paddle/fluid/eager/eager_amp_auto_cast.h @@ -27,7 +27,8 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor, paddle::platform::is_xpu_place(place) || paddle::platform::is_mlu_place(place) || paddle::platform::is_npu_place(place) || - paddle::platform::is_npu_pinned_place(place)) { + paddle::platform::is_npu_pinned_place(place) || + paddle::platform::is_custom_place(place)) { // CudaPinndePlace is added for varbase created by dataloader if ((data_type == paddle::experimental::DataType::FLOAT32 || data_type == paddle::experimental::DataType::FLOAT16 || diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_to_static.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_to_static.py index a1b485b3f5ce04073b8b4b645d27cdecb70074d0..6ce9cb45761ec4f86cee163e78244b7903f92dbc 100644 --- a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_to_static.py +++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_to_static.py @@ -54,7 +54,11 @@ def train_func_ampo1(epoch_id, train_loader, model, cost, optimizer, scaler): for batch_id, (images, labels) in enumerate(train_loader()): # forward with paddle.amp.auto_cast( - custom_black_list={"flatten_contiguous_range", "greater_than"}, + custom_black_list={ + "flatten_contiguous_range", + "greater_than", + "matmul_v2", + }, level='O1', ): outputs = model(images)