[KP] fix bug when phi kernel is *_raw (#42113)

* [KP] fix bug when phi kernel is *_raw * modify the static graph * delete useless comment * delete the phi multiply kernel case * add VLOG(3) message * add VLOG(3) message * fix static graph error in phi * fix bug in tranform model * modify the comment * delete useless code * fix CI bug * fix CI bug

[KP] fix bug when phi kernel is *_raw (#42113)
* [KP] fix bug when phi kernel is *_raw * modify the static graph * delete useless comment * delete the phi multiply kernel case * add VLOG(3) message * add VLOG(3) message * fix static graph error in phi * fix bug in tranform model * modify the comment * delete useless code * fix CI bug * fix CI bug
9fd2c546 · Liu-xiandong · GitHub · 62c0304b · 9fd2c546 · 9fd2c546
隐藏空白更改
内联并排

Showing with 88 addition and 32 deletion

paddle/fluid/framework/operator.cc paddle/fluid/framework/operator.cc +76 -24

paddle/fluid/imperative/prepared_operator.cc paddle/fluid/imperative/prepared_operator.cc +12 -8

未找到文件。
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1281,6 +1278,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
    dev_ctx = pool.Get(kernel_type_->place_);
  }
+// TODO(Liu-xiandong): Now we are using too much if-else and hard code in XPU
+// device, it's ugly, and we will refactor in the future.
+#if defined(PADDLE_WITH_XPU_KP)
+  bool use_phi_xpu_kp = false;
+#endif
  // TODO(chenweihang): Now we are still reusing a lot of the original fluid
  // implementation, this is a gradual replacement process
  // TODO(chenweihang): in the first phase of project, we only support CPU, CUDA
@@ -1299,6 +1302,45 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
      dev_ctx = pool.Get(kernel_type_->place_);
      pt_kernel_name = kernel_signature_->name;
+// NOTE(Liu-xiandong): The register kernel used KP have library_type[KP],
+// But the default library_type is Plain, so we need to modify the
+// library_type here, otherwise it can't work.
+#ifdef PADDLE_WITH_XPU_KP
+      if (paddle::platform::is_xpu_place(kernel_type_->place_)) {
+        bool use_xpu_kp_kernel_rt =
+            FLAGS_run_kp_kernel &&
+            paddle::platform::is_xpu_kp_support_op(type_, *kernel_type_);
+        bool use_xpu_kp_kernel_debug =
+            paddle::platform::is_in_xpu_kpwhite_list(type_);
+        if (use_xpu_kp_kernel_rt) {
+          VLOG(3) << "phi xpu_kp using rt mode in static graph";
+        }
+        if (use_xpu_kp_kernel_debug) {
+          VLOG(3) << "phi xpu_kp using debug mode in static graph";
+        }
+        bool is_xpu_kp_support =
+            (use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug);
+        if (is_xpu_kp_support) {
+          auto expected_kernel_key_library_type = kernel_type_->library_type_;
+          kernel_type_->library_type_ = LibraryType::kKP;
+          VLOG(3) << "modifing XPU KP kernel in static graph: "
+                  << pt_kernel_name
+                  << ", using_kernel_key:" << *kernel_type_.get();
+          auto try_pt_kernel_key =
+              TransOpKernelTypeToPhiKernelKey(*kernel_type_.get());
+          if (!phi::KernelFactory::Instance().HasKernel(pt_kernel_name,
+                                                        try_pt_kernel_key)) {
+            kernel_type_->library_type_ = expected_kernel_key_library_type;
+            VLOG(3) << "modify XPU KP kernel in static graph: "
+                    << pt_kernel_name << " is failed " << *kernel_type_.get();
+          } else {
+            use_phi_xpu_kp = true;
+            VLOG(3) << "modify XPU KP kernel in static graph: "
+                    << pt_kernel_name << " is succeed " << *kernel_type_.get();
+          }
+        }
+      }
+#endif
      pt_kernel_key = TransOpKernelTypeToPhiKernelKey(*kernel_type_.get());
      pt_kernel_.reset(
          new phi::Kernel(phi::KernelFactory::Instance().SelectKernel(
@@ -1314,9 +1356,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
      }
    } else {
      pt_kernel_name = kernel_signature_->name;
-// NOTE(Liu-xiandong): The register kernel used KP have library_type[KP],
+// NOTE(Liu-xiandong):In my ctest, this branch do not be executed,
-// But the default library_type is Plain, so we need to modify the
+// I can't understand it, it's really confusing.
-// library_type here, otherwise it can't work.
+// But we still need to keep this to avoid errors.
 #ifdef PADDLE_WITH_XPU_KP
      if (paddle::platform::is_xpu_place(kernel_type_->place_)) {
        bool use_xpu_kp_kernel_rt =
@@ -1335,15 +1377,20 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
        if (is_xpu_kp_support) {
          auto expected_kernel_key_library_type = kernel_type_->library_type_;
          kernel_type_->library_type_ = LibraryType::kKP;
-          VLOG(3) << "modifing XPU KP kernel in static graph: " << type_
+          VLOG(3) << "modifing XPU KP kernel in static graph: "
+                  << pt_kernel_name
                  << ", using_kernel_key:" << *kernel_type_.get();
          auto try_pt_kernel_key =
              TransOpKernelTypeToPhiKernelKey(*kernel_type_.get());
          if (!phi::KernelFactory::Instance().HasKernel(pt_kernel_name,
                                                        try_pt_kernel_key)) {
            kernel_type_->library_type_ = expected_kernel_key_library_type;
-            VLOG(3) << "modify XPU KP kernel in static graph: " << type_
+            VLOG(3) << "modify XPU KP kernel in static graph: "
-                    << " is failed " << *kernel_type_.get();
+                    << pt_kernel_name << " is failed " << *kernel_type_.get();
+          } else {
+            use_phi_xpu_kp = true;
+            VLOG(3) << "modify XPU KP kernel in static graph: "
+                    << pt_kernel_name << " is succeed " << *kernel_type_.get();
          }
        }
      }
@@ -1360,11 +1407,25 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
            !paddle::platform::is_xpu_support_op(type_, *kernel_type_.get()) ||
        paddle::platform::is_in_xpu_black_list(type_);
 #endif
+#ifdef PADDLE_WITH_XPU_KP
+    bool use_xpu_kp_kernel_rt =
+        paddle::platform::is_xpu_place(kernel_type_->place_) &&
+        FLAGS_run_kp_kernel &&
+        paddle::platform::is_xpu_kp_support_op(type_, *kernel_type_);
+    bool use_xpu_kp_kernel_debug =
+        paddle::platform::is_xpu_place(kernel_type_->place_) &&
+        paddle::platform::is_in_xpu_kpwhite_list(type_);
+    bool is_xpu_kp_support = (use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug);
+#endif
    if (pt_kernel_->IsValid()
 #if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
        && !is_xpu_unsupport
 #endif
-        ) {
+#if defined(PADDLE_WITH_XPU_KP)
+        && (!is_xpu_unsupport || use_phi_xpu_kp)
+#endif
+            ) {
      run_phi_kernel_ = true;
    } else {
      auto& all_op_kernels = AllOpKernels();
@@ -1374,15 +1435,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
 // we need to select the heterogeneous kernel in fluid, but the kernel
 // registered in KP use library_type[KP], we need to modify it.
 #ifdef PADDLE_WITH_XPU_KP
-      bool use_xpu_kp_kernel_rt =
-          paddle::platform::is_xpu_place(kernel_type_->place_) &&
-          FLAGS_run_kp_kernel &&
-          paddle::platform::is_xpu_kp_support_op(type_, *kernel_type_);
-      bool use_xpu_kp_kernel_debug =
-          paddle::platform::is_xpu_place(kernel_type_->place_) &&
-          paddle::platform::is_in_xpu_kpwhite_list(type_);
-      bool is_xpu_kp_support =
-          (use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug);
      if (is_xpu_kp_support) {
        kernel_type_->library_type_ = LibraryType::kKP;
      }
@@ -1609,7 +1661,7 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
      (kernel_iter == kernels.end() ||
       !paddle::platform::is_xpu_support_op(type_, expected_kernel_key) ||
       paddle::platform::is_in_xpu_black_list(type_))) {
-    VLOG(3) << "missing XPU kernel: " << type_
+    VLOG(3) << "fluid missing XPU kernel: " << type_
            << ", expected_kernel_key:" << expected_kernel_key
            << ", fallbacking to CPU one!";
    expected_kernel_key.place_ = platform::CPUPlace();
@@ -1625,10 +1677,10 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
    bool use_xpu_kp_kernel_debug =
        paddle::platform::is_in_xpu_kpwhite_list(type_);
    if (use_xpu_kp_kernel_rt) {
-      VLOG(3) << "xpu_kp using rt mode ";
+      VLOG(3) << "fluid xpu_kp using rt mode ";
    }
    if (use_xpu_kp_kernel_debug) {
-      VLOG(3) << "xpu_kp using debug mode ";
+      VLOG(3) << "fluid xpu_kp using debug mode ";
    }
    bool is_xpu_kp_support = (use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug);
    if (is_xpu_kp_support) {
@@ -1645,7 +1697,7 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
        expected_kernel_key.place_ = platform::CPUPlace();
        kernel_iter = kernels.find(expected_kernel_key);
      } else {
-        VLOG(3) << "using XPU KP kernel: " << type_
+        VLOG(3) << "fluid using XPU KP kernel: " << type_
                << ", using_kernel_key:" << expected_kernel_key;
      }
    }
@@ -1654,7 +1706,7 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
         paddle::platform::is_in_xpu_black_list(type_));
    if (!is_xpu_kp_support &&
        (kernel_iter == kernels.end() || is_xpu_unsupport)) {
-      VLOG(3) << "missing XPU kernel: " << type_
+      VLOG(3) << "fluid missing XPU kernel: " << type_
              << ", expected_kernel_key:" << expected_kernel_key
              << ", fallbacking to CPU one!";
      expected_kernel_key.place_ = platform::CPUPlace();

--- a/paddle/fluid/imperative/prepared_operator.cc
+++ b/paddle/fluid/imperative/prepared_operator.cc
@@ -233,14 +233,18 @@ PreparedOp PrepareImpl(
        auto expected_kernel_key_library_type =
            expected_kernel_key.library_type_;
        expected_kernel_key.library_type_ = paddle::framework::LibraryType::kKP;
-        VLOG(3) << "modifing XPU KP kernel: " << op.Type()
+        VLOG(3) << "modifing XPU KP kernel: " << pt_kernel_name
                << ", using_kernel_key:" << expected_kernel_key;
        phi::KernelKey try_pt_kernel_key =
            TransOpKernelTypeToPhiKernelKey(expected_kernel_key);
        if (!phi_kernel_factory.HasKernel(pt_kernel_name, try_pt_kernel_key)) {
          expected_kernel_key.library_type_ = expected_kernel_key_library_type;
-          VLOG(3) << "modify XPU KP kernel: " << op.Type() << " is failed "
+          VLOG(3) << "modify XPU KP kernel: " << pt_kernel_name
-                  << expected_kernel_key;
+                  << " in dynamic graph is failed " << expected_kernel_key;
+        } else {
+          VLOG(3) << "modify XPU KP kernel: " << pt_kernel_name
+                  << " in dynamic graph is succeed " << expected_kernel_key;
        }
      }
    }
@@ -332,7 +336,7 @@ PreparedOp PrepareImpl(
 #if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
  if (paddle::platform::is_xpu_place(expected_kernel_key.place_) &&
      (kernel_iter == kernels.end() || is_xpu_unsupport)) {
-    VLOG(3) << "missing XPU kernel: " << op.Type()
+    VLOG(3) << "fluid missing XPU kernel: " << op.Type()
            << ", expected_kernel_key:" << expected_kernel_key
            << ", fallbacking to CPU one!";
    expected_kernel_key.place_ = platform::CPUPlace();
@@ -343,20 +347,20 @@ PreparedOp PrepareImpl(
 #ifdef PADDLE_WITH_XPU_KP
  if (paddle::platform::is_xpu_place(expected_kernel_key.place_)) {
    if (use_xpu_kp_kernel_rt) {
-      VLOG(3) << "xpu_kp using rt mode ";
+      VLOG(3) << "fluid xpu_kp using rt mode ";
    }
    if (use_xpu_kp_kernel_debug) {
-      VLOG(3) << "xpu_kp using debug mode ";
+      VLOG(3) << "fluid xpu_kp using debug mode ";
    }
    if (is_xpu_kp_support) {
      expected_kernel_key.library_type_ = paddle::framework::LibraryType::kKP;
      kernel_iter = kernels.find(expected_kernel_key);
-      VLOG(3) << "using XPU KP kernel: " << op.Type()
+      VLOG(3) << "using fluid XPU KP kernel: " << op.Type()
              << ", using_kernel_key:" << expected_kernel_key;
    }
    if (!is_xpu_kp_support &&
        (kernel_iter == kernels.end() || is_xpu_unsupport)) {
-      VLOG(3) << "missing XPU kernel: " << op.Type()
+      VLOG(3) << "fluid missing XPU kernel: " << op.Type()
              << ", expected_kernel_key:" << expected_kernel_key
              << ", fallbacking to CPU one!";
      expected_kernel_key.place_ = platform::CPUPlace();