未验证 提交 5da1a27b 编写于 作者: S sneaxiy 提交者: GitHub

Remove fluid deps in fused_linear_param_grad_add_kernel.cu (#51975)

* remove fluid deps in fused_linear_param_grad_add_kernel

* fix compile error

* fix ut error

* follow comments
上级 101c9bb0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -14,40 +14,4 @@ ...@@ -14,40 +14,4 @@
#pragma once #pragma once
#include <type_traits> #include "paddle/phi/core/scope_guard.h"
#include <utility>
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace framework {
template <typename ReleaseCallback>
class ScopeGuard {
public:
explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
~ScopeGuard() { callback_(); }
private:
DISABLE_COPY_AND_ASSIGN(ScopeGuard);
private:
ReleaseCallback callback_;
};
// Two macros are needed here.
// See:
// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
#define _PADDLE_CONCAT_TOKEN(x, y) x##y
#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
#define DEFINE_PADDLE_SCOPE_GUARD(...) \
auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \
::paddle::framework::ScopeGuard<typename std::remove_reference< \
decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \
PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
} // namespace framework
} // namespace paddle
...@@ -14,13 +14,13 @@ limitations under the License. */ ...@@ -14,13 +14,13 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h" #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#include "paddle/phi/kernels/primitive/kernel_primitives.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h"
namespace paddle { namespace paddle {
...@@ -129,7 +129,7 @@ class AttnMatMul { ...@@ -129,7 +129,7 @@ class AttnMatMul {
bool fused = false) { bool fused = false) {
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
if (compute_bias_ && fused) { if (compute_bias_ && fused) {
ComputeFusedGemmEpilogueBackward<T>(dev_ctx_, phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx_,
d_output, d_output,
input, input,
weight, weight,
......
...@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,12 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -13,12 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h" #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -151,7 +151,7 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel<T> { ...@@ -151,7 +151,7 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel<T> {
<< ", activation=" << activation_grad << ", activation=" << activation_grad
<< ", reserve_space=" << reserve_space; << ", reserve_space=" << reserve_space;
ComputeFusedGemmEpilogueBackward<T>(dev_ctx, phi::funcs::ComputeFusedGemmEpilogueBackward<T>(dev_ctx,
dout, dout,
x, x,
y, y,
......
...@@ -30,11 +30,11 @@ limitations under the License. */ ...@@ -30,11 +30,11 @@ limitations under the License. */
#include "paddle/fluid/operators/fused/attn_gemm.h" #include "paddle/fluid/operators/fused/attn_gemm.h"
#include "paddle/fluid/operators/fused/fmha_ref.h" #include "paddle/fluid/operators/fused/fmha_ref.h"
#include "paddle/fluid/operators/fused/fused_dropout_helper.h" #include "paddle/fluid/operators/fused/fused_dropout_helper.h"
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/dynload/cublasLt.h" #include "paddle/fluid/platform/dynload/cublasLt.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
...@@ -1871,7 +1871,8 @@ class CublasFusedMLP { ...@@ -1871,7 +1871,8 @@ class CublasFusedMLP {
const auto *x_data = x->data<T>(); const auto *x_data = x->data<T>();
const auto *w_data = weight->data<T>(); const auto *w_data = weight->data<T>();
auto algo = GemmEpilogueAlgoCache::Instance().GetGemmAlgo(lt_handle, auto algo = phi::funcs::GemmEpilogueAlgoCache::Instance().GetGemmAlgo(
lt_handle,
operation_desc_, operation_desc_,
w_desc_, w_desc_,
x_desc_, x_desc_,
......
...@@ -726,6 +726,16 @@ ...@@ -726,6 +726,16 @@
optional : skip_update, master_params optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather - op : gather
args : (Tensor x, Tensor index, Scalar(int) axis=0) args : (Tensor x, Tensor index, Scalar(int) axis=0)
output : Tensor(out) output : Tensor(out)
......
...@@ -614,16 +614,6 @@ ...@@ -614,16 +614,6 @@
data_type : x data_type : x
backward : fused_dropout_add_grad backward : fused_dropout_add_grad
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather_nd - op : gather_nd
args : (Tensor x, Tensor index) args : (Tensor x, Tensor index)
output : Tensor output : Tensor
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <type_traits>
#include <utility>
#include "paddle/phi/core/macros.h"
namespace phi {
template <typename ReleaseCallback>
class ScopeGuard {
public:
explicit ScopeGuard(const ReleaseCallback &callback) : callback_(callback) {}
~ScopeGuard() { callback_(); }
private:
DISABLE_COPY_AND_ASSIGN(ScopeGuard);
private:
ReleaseCallback callback_;
};
// Two macros are needed here.
// See:
// https://stackoverflow.com/questions/10379691/creating-macro-using-line-for-different-variable-names
#define _PADDLE_CONCAT_TOKEN(x, y) x##y
#define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y)
#define DEFINE_PADDLE_SCOPE_GUARD(...) \
auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \
::phi::ScopeGuard<typename std::remove_reference< \
decltype(PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))>::type> \
PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \
PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__))
} // namespace phi
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/phi/kernels/fusion/fused_linear_param_grad_add_kernel.h" #include "paddle/phi/kernels/fusion/fused_linear_param_grad_add_kernel.h"
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
#include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h" #include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"
#endif #endif
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
...@@ -41,7 +41,7 @@ void FusedLinearParamGradAddImpl(const Context &ctx, ...@@ -41,7 +41,7 @@ void FusedLinearParamGradAddImpl(const Context &ctx,
const bool fuse_bias_grad = kIsMultiPrecision && dweight_out; const bool fuse_bias_grad = kIsMultiPrecision && dweight_out;
if (dweight_out) { if (dweight_out) {
paddle::operators::ComputeFusedGemmEpilogueBackward<T, T, MT>( phi::funcs::ComputeFusedGemmEpilogueBackward<T, T, MT>(
ctx, ctx,
&dout, &dout,
&x, &x,
...@@ -184,10 +184,6 @@ void FusedLinearParamGradAdd(const Context &ctx, ...@@ -184,10 +184,6 @@ void FusedLinearParamGradAdd(const Context &ctx,
FusedLinearParamGradAddImpl<T, T, Context>( FusedLinearParamGradAddImpl<T, T, Context>(
ctx, x, dout, dbias, M, K, N, use_addto, dweight_out, dbias_out); ctx, x, dout, dbias, M, K, N, use_addto, dweight_out, dbias_out);
} }
if (VLOG_IS_ON(kLogLevel)) {
ctx.Wait();
}
} }
#else #else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册