未验证 提交 5da1a27b 编写于 作者: S sneaxiy 提交者: GitHub

Remove fluid deps in fused_linear_param_grad_add_kernel.cu (#51975)

* remove fluid deps in fused_linear_param_grad_add_kernel

* fix compile error

* fix ut error

* follow comments
上级 101c9bb0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -14,40 +14,4 @@
#pragma once
#include <type_traits>
#include <utility>
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace framework {
template <typename ReleaseCallback>
class ScopeGuard {
public:
explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
~ScopeGuard() { callback_(); }
private:
DISABLE_COPY_AND_ASSIGN(ScopeGuard);
private:
ReleaseCallback callback_;
};
// Two macros are needed here.
// See:
// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
#define _PADDLE_CONCAT_TOKEN(x, y) x##y
#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
#define DEFINE_PADDLE_SCOPE_GUARD(...) \
auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \
::paddle::framework::ScopeGuard<typename std::remove_reference< \
decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \
PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
} // namespace framework
} // namespace paddle
#include "paddle/phi/core/scope_guard.h"
......@@ -14,13 +14,13 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#include "paddle/phi/kernels/primitive/kernel_primitives.h"
namespace paddle {
......@@ -129,7 +129,7 @@ class AttnMatMul {
bool fused = false) {
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
if (compute_bias_ && fused) {
ComputeFusedGemmEpilogueBackward<T>(dev_ctx_,
phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx_,
d_output,
input,
weight,
......
......@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
namespace paddle {
namespace operators {
......
......@@ -13,12 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
namespace paddle {
namespace operators {
......@@ -151,7 +151,7 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel<T> {
<< ", activation=" << activation_grad
<< ", reserve_space=" << reserve_space;
ComputeFusedGemmEpilogueBackward<T>(dev_ctx,
phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx,
dout,
x,
y,
......
......@@ -30,11 +30,11 @@ limitations under the License. */
#include "paddle/fluid/operators/fused/attn_gemm.h"
#include "paddle/fluid/operators/fused/fmha_ref.h"
#include "paddle/fluid/operators/fused/fused_dropout_helper.h"
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/dynload/cublasLt.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
......@@ -1871,7 +1871,8 @@ class CublasFusedMLP {
const auto *x_data = x->data<T>();
const auto *w_data = weight->data<T>();
auto algo = GemmEpilogueAlgoCache::Instance().GetGemmAlgo(lt_handle,
auto algo = phi::funcs::GemmEpilogueAlgoCache::Instance().GetGemmAlgo(
lt_handle,
operation_desc_,
w_desc_,
x_desc_,
......
......@@ -726,6 +726,16 @@
optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather
args : (Tensor x, Tensor index, Scalar(int) axis=0)
output : Tensor(out)
......
......@@ -614,16 +614,6 @@
data_type : x
backward : fused_dropout_add_grad
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather_nd
args : (Tensor x, Tensor index)
output : Tensor
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <type_traits>
#include <utility>
#include "paddle/phi/core/macros.h"
namespace phi {
template <typename ReleaseCallback>
class ScopeGuard {
public:
explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
~ScopeGuard() { callback_(); }
private:
DISABLE_COPY_AND_ASSIGN(ScopeGuard);
private:
ReleaseCallback callback_;
};
// Two macros are needed here.
// See:
// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
#define _PADDLE_CONCAT_TOKEN(x, y) x##y
#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
#define DEFINE_PADDLE_SCOPE_GUARD(...) \
auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \
::phi::ScopeGuard<typename std::remove_reference< \
decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \
PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
} // namespace phi
......@@ -15,7 +15,7 @@
#include "paddle/phi/kernels/fusion/fused_linear_param_grad_add_kernel.h"
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#endif
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/data_type.h"
......@@ -41,7 +41,7 @@ void FusedLinearParamGradAddImpl(const Context &ctx,
const bool fuse_bias_grad = kIsMultiPrecision && dweight_out;
if (dweight_out) {
paddle::operators::ComputeFusedGemmEpilogueBackward<T, T, MT>(
phi::funcs::ComputeFusedGemmEpilogueBackward<T, T, MT>(
ctx,
&dout,
&x,
......@@ -184,10 +184,6 @@ void FusedLinearParamGradAdd(const Context &ctx,
FusedLinearParamGradAddImpl<T, T, Context>(
ctx, x, dout, dbias, M, K, N, use_addto, dweight_out, dbias_out);
}
if (VLOG_IS_ON(kLogLevel)) {
ctx.Wait();
}
}
#else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册