polish the amp code (#51020)

ca8e21a6 · Zhang Ting · GitHub · 871d2d36 · ca8e21a6 · ca8e21a6
4 changed file
--- a/paddle/fluid/eager/amp_utils.h
+++ b/paddle/fluid/eager/amp_utils.h
@@ -122,90 +122,42 @@ inline paddle::experimental::DataType GetAmpDestDtype(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::Tensor>,
                               kSlotSmallVectorSize>& amp_tensors_vector) {
-  auto amp_dtype =
-      egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
  auto amp_level = egr::Controller::Instance().GetAMPLevel();
-  VLOG(6) << "AMP GetAmpDestDtype:"
-          << " op(" << op_name << ") amp_dtype(" << amp_dtype << ") amp_level("
-          << static_cast<int>(amp_level) << ").";
-  auto return_amp_type = paddle::experimental::DataType::FLOAT16;
-
-  if (amp_dtype == "float16") {
-    if (amp_level == paddle::imperative::AmpLevel::O1) {
-      if (paddle::imperative::AmpOperators::Instance()
-              .GetMutableAllowOps()
-              ->count(op_name)) {
-        return_amp_type = paddle::experimental::DataType::FLOAT16;
-      } else if (paddle::imperative::AmpOperators::Instance()
-                     .GetMutableBlockOps()
-                     ->count(op_name) ||
-                 paddle::imperative::AmpOperators::Instance()
-                     .GetMutableUnsupportedFp16Ops()
-                     ->count(op_name)) {
-        return_amp_type = paddle::experimental::DataType::FLOAT32;
-      } else {
-        auto dst_type = GetPromoteType(op_name,
-                                       amp_tensors_vector,
-                                       paddle::experimental::DataType::FLOAT16);
-        if (dst_type == paddle::experimental::DataType::FLOAT16 &&
-            paddle::imperative::AmpOperators::Instance()
-                .GetMutableUnsupportedFp16Ops()
-                ->count(op_name)) {
-          dst_type = paddle::experimental::DataType::FLOAT32;
-        }
-        return_amp_type = dst_type;
-      }
-    } else if (amp_level == paddle::imperative::AmpLevel::O2) {
-      auto dst_type = paddle::experimental::DataType::FLOAT16;
-      if (paddle::imperative::AmpOperators::Instance()
-              .GetMutableUnsupportedFp16Ops()
-              ->count(op_name) ||
-          paddle::imperative::AmpOperators::Instance()
-              .GetMutableBlockOps()
-              ->count(op_name)) {
-        dst_type = paddle::experimental::DataType::FLOAT32;
-      }
-      return_amp_type = dst_type;
+  auto amp_setting_dtype =
+      egr::Controller::Instance().GetCurrentTracer()->GetAmpPhiDtype();
+  auto dst_type = amp_setting_dtype;
+  if (amp_level == paddle::imperative::AmpLevel::O1) {
+    if (paddle::imperative::AmpOperators::Instance()
+            .GetMutableAllowOps()
+            ->count(op_name)) {
+      dst_type = amp_setting_dtype;
+    } else if (paddle::imperative::AmpOperators::Instance()
+                   .GetMutableBlockOps()
+                   ->count(op_name)) {
+      dst_type = paddle::experimental::DataType::FLOAT32;
+    } else {
+      dst_type = GetPromoteType(op_name, amp_tensors_vector, amp_setting_dtype);
    }
-  } else if (amp_dtype == "bfloat16") {
-    if (amp_level == paddle::imperative::AmpLevel::O1) {
-      if (paddle::imperative::AmpOperators::Instance()
-              .GetMutableAllowOps()
-              ->count(op_name)) {
-        return_amp_type = paddle::experimental::DataType::BFLOAT16;
-      } else if (paddle::imperative::AmpOperators::Instance()
-                     .GetMutableBlockOps()
-                     ->count(op_name)) {
-        return_amp_type = paddle::experimental::DataType::FLOAT32;
-      } else {
-        auto dst_type =
-            GetPromoteType(op_name,
-                           amp_tensors_vector,
-                           paddle::experimental::DataType::BFLOAT16);
-        if (dst_type == paddle::experimental::DataType::BFLOAT16 &&
-            paddle::imperative::AmpOperators::Instance()
-                .GetMutableUnsupportedBf16Ops()
-                ->count(op_name)) {
-          dst_type = paddle::experimental::DataType::FLOAT32;
-        }
-        return_amp_type = dst_type;
-      }
-    } else if (amp_level == paddle::imperative::AmpLevel::O2) {
-      auto dst_type = paddle::experimental::DataType::BFLOAT16;
-      if (paddle::imperative::AmpOperators::Instance()
-              .GetMutableUnsupportedBf16Ops()
-              ->count(op_name) ||
-          paddle::imperative::AmpOperators::Instance()
-              .GetMutableBlockOps()
-              ->count(op_name)) {
-        dst_type = paddle::experimental::DataType::FLOAT32;
-      }
-      return_amp_type = dst_type;
+  } else if (amp_level == paddle::imperative::AmpLevel::O2) {
+    if (paddle::imperative::AmpOperators::Instance()
+            .GetMutableBlockOps()
+            ->count(op_name)) {
+      dst_type = paddle::experimental::DataType::FLOAT32;
    }
-  } else {
-    return_amp_type = paddle::experimental::DataType::FLOAT32;
  }
-  return GetDtypeWithPlace(op_name, amp_tensors_vector, return_amp_type);
+
+  if (dst_type == amp_setting_dtype &&
+      (paddle::imperative::AmpOperators::Instance()
+           .GetMutableUnsupportedOps(amp_setting_dtype)
+           ->count(op_name))) {
+    dst_type = paddle::experimental::DataType::FLOAT32;
+  }
+
+  dst_type = GetDtypeWithPlace(op_name, amp_tensors_vector, dst_type);
+  VLOG(6) << "AMP GetAmpDestDtype:"
+          << " op(" << op_name << ") amp_dtype(" << dst_type << ") amp_level("
+          << static_cast<int>(amp_level) << ").";
+  return dst_type;
 }

 }  // namespace egr
--- a/paddle/fluid/imperative/amp_auto_cast.cc
+++ b/paddle/fluid/imperative/amp_auto_cast.cc
@@ -200,6 +200,22 @@ AmpOperators::GetMutableBlockOps() {
  return block_ops_;
 }

+std::shared_ptr<std::unordered_set<std::string>>
+AmpOperators::GetMutableUnsupportedOps(
+    const paddle::experimental::DataType& data_type) {
+  PADDLE_ENFORCE_EQ(
+      data_type == paddle::experimental::DataType::FLOAT16 ||
+          data_type == paddle::experimental::DataType::BFLOAT16,
+      true,
+      phi::errors::InvalidArgument(
+          "The data_type mismatch. It should be FLOAT16 or BFLOAT16."));
+  if (data_type == paddle::experimental::DataType::FLOAT16) {
+    return unsupported_fp16_ops_;
+  } else {
+    return unsupported_bf16_ops_;
+  }
+}
+
 std::shared_ptr<std::unordered_set<std::string>>
 AmpOperators::GetMutableUnsupportedFp16Ops() {
  return unsupported_fp16_ops_;

--- a/paddle/fluid/imperative/amp_auto_cast.h
+++ b/paddle/fluid/imperative/amp_auto_cast.h
@@ -54,6 +54,9 @@ class AmpOperators {

  std::shared_ptr<std::unordered_set<std::string>> GetMutableBlockOps();

+  std::shared_ptr<std::unordered_set<std::string>> GetMutableUnsupportedOps(
+      const paddle::experimental::DataType& data_type);
+
  std::shared_ptr<std::unordered_set<std::string>>
  GetMutableUnsupportedFp16Ops();


--- a/paddle/fluid/imperative/tracer.h
+++ b/paddle/fluid/imperative/tracer.h
@@ -184,6 +184,8 @@ class Tracer {
    }
  }

+  phi::DataType GetAmpPhiDtype() const { return amp_dtype_; }
+
  void DisableLayoutAutoTune() { use_layout_autotune_ = false; }

  void EnableLayoutAutoTune() { use_layout_autotune_ = true; }