Remove fluid deps in fused_linear_param_grad_add_kernel.cu (#51975)

* remove fluid deps in fused_linear_param_grad_add_kernel * fix compile error * fix ut error * follow comments

Remove fluid deps in fused_linear_param_grad_add_kernel.cu (#51975)
* remove fluid deps in fused_linear_param_grad_add_kernel * fix compile error * fix ut error * follow comments
5da1a27b · sneaxiy · GitHub · 101c9bb0 · 5da1a27b · 5da1a27b
10 changed file
--- a/paddle/fluid/framework/scope_guard.h
+++ b/paddle/fluid/framework/scope_guard.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,40 +14,4 @@
 #pragma once
-#include <type_traits>
+#include "paddle/phi/core/scope_guard.h"
-#include <utility>
-#include "paddle/fluid/platform/macros.h"
-namespace paddle {
-namespace framework {
-template <typename ReleaseCallback>
-class ScopeGuard {
- public:
-  explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
-  ~ScopeGuard() { callback_(); }
- private:
-  DISABLE_COPY_AND_ASSIGN(ScopeGuard);
- private:
-  ReleaseCallback callback_;
-};
-// Two macros are needed here.
-// See:
-// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
-#define _PADDLE_CONCAT_TOKEN(x, y) x##y
-#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
-#define DEFINE_PADDLE_SCOPE_GUARD(...)                                    \
-  auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__;   \
-  ::paddle::framework::ScopeGuard<typename std::remove_reference<         \
-      decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
-  PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)(                           \
-      PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/operators/fused/attn_gemm.h
+++ b/paddle/fluid/operators/fused/attn_gemm.h
@@ -14,13 +14,13 @@ limitations under the License. */
 #pragma once
-#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
 #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/funcs/elementwise_functor.h"
+#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
 #include "paddle/phi/kernels/primitive/kernel_primitives.h"
 namespace paddle {
@@ -129,7 +129,7 @@ class AttnMatMul {
                       bool fused = false) {
 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
    if (compute_bias_ && fused) {
-      ComputeFusedGemmEpilogueBackward<T>(dev_ctx_,
+      phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx_,
                                                      d_output,
                                                      input,
                                                      weight,

--- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc
+++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
+#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu
+++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu
@@ -13,12 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
+#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
 namespace paddle {
 namespace operators {
@@ -151,7 +151,7 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel<T> {
            << ", activation=" << activation_grad
            << ", reserve_space=" << reserve_space;
-    ComputeFusedGemmEpilogueBackward<T>(dev_ctx,
+    phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx,
                                                    dout,
                                                    x,
                                                    y,

--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
@@ -30,11 +30,11 @@ limitations under the License. */
 #include "paddle/fluid/operators/fused/attn_gemm.h"
 #include "paddle/fluid/operators/fused/fmha_ref.h"
 #include "paddle/fluid/operators/fused/fused_dropout_helper.h"
-#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/fluid/platform/dynload/cublasLt.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/backends/gpu/gpu_device_function.h"
+#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
@@ -1871,7 +1871,8 @@ class CublasFusedMLP {
    const auto *x_data = x->data<T>();
    const auto *w_data = weight->data<T>();
-    auto algo = GemmEpilogueAlgoCache::Instance().GetGemmAlgo(lt_handle,
+    auto algo = phi::funcs::GemmEpilogueAlgoCache::Instance().GetGemmAlgo(
+        lt_handle,
        operation_desc_,
        w_desc_,
        x_desc_,

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -726,6 +726,16 @@
  optional : skip_update, master_params
  inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
+- op : fused_linear_param_grad_add
+  args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
+  output : Tensor(dweight_out), Tensor(dbias_out)
+  infer_meta:
+    func : FusedLinearParamGradAddInferMeta
+  optional : dweight, dbias
+  kernel:
+    func : fused_linear_param_grad_add
+    data_type : dout
 - op : gather
  args : (Tensor x, Tensor index, Scalar(int) axis=0)
  output : Tensor(out)

--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -614,16 +614,6 @@
    data_type : x
  backward : fused_dropout_add_grad
- op : fused_linear_param_grad_add
-  args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
-  output : Tensor(dweight_out), Tensor(dbias_out)
-  infer_meta:
-    func : FusedLinearParamGradAddInferMeta
-  optional : dweight, dbias
-  kernel:
-    func : fused_linear_param_grad_add
-    data_type : dout
 - op : gather_nd
  args : (Tensor x, Tensor index)
  output : Tensor

--- a/paddle/phi/core/scope_guard.h
+++ b/paddle/phi/core/scope_guard.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <type_traits>
+#include <utility>
+#include "paddle/phi/core/macros.h"
+namespace phi {
+template <typename ReleaseCallback>
+class ScopeGuard {
+ public:
+  explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
+  ~ScopeGuard() { callback_(); }
+ private:
+  DISABLE_COPY_AND_ASSIGN(ScopeGuard);
+ private:
+  ReleaseCallback callback_;
+};
+// Two macros are needed here.
+// See:
+// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
+#define _PADDLE_CONCAT_TOKEN(x, y) x##y
+#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
+#define DEFINE_PADDLE_SCOPE_GUARD(...)                                    \
+  auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__;   \
+  ::phi::ScopeGuard<typename std::remove_reference<                       \
+      decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
+  PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)(                           \
+      PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
+}  // namespace phi
--- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h
+++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h
--- a/paddle/phi/kernels/fusion/gpu/fused_linear_param_grad_add_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_linear_param_grad_add_kernel.cu
@@ -15,7 +15,7 @@
 #include "paddle/phi/kernels/fusion/fused_linear_param_grad_add_kernel.h"
 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
-#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
+#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
 #endif
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/data_type.h"
@@ -41,7 +41,7 @@ void FusedLinearParamGradAddImpl(const Context &ctx,
  const bool fuse_bias_grad = kIsMultiPrecision && dweight_out;
  if (dweight_out) {
-    paddle::operators::ComputeFusedGemmEpilogueBackward<T, T, MT>(
+    phi::funcs::ComputeFusedGemmEpilogueBackward<T, T, MT>(
        ctx,
        &dout,
        &x,
@@ -184,10 +184,6 @@ void FusedLinearParamGradAdd(const Context &ctx,
    FusedLinearParamGradAddImpl<T, T, Context>(
        ctx, x, dout, dbias, M, K, N, use_addto, dweight_out, dbias_out);
  }
-  if (VLOG_IS_ON(kLogLevel)) {
-    ctx.Wait();
-  }
 }
 #else