未验证 提交 7c302538 编写于 作者: H huangjiyi 提交者: GitHub

[PHI decoupling] remove "paddle/fluid/platform/dynload/xxx.h" in phi (#47787)

* rm "paddle/fluid/platform/dynload/cudnn.h" in phi

* rm "paddle/fluid/platform/dynload/mklml.h" in phi

* rm "paddle/fluid/platform/dynload/rocblas.h" in phi

* replace "paddle::platform::dynload::" with "phi::dynload::" in phi

* revert "blas_impl.cu.h"
上级 c551e55d
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_WITH_LIBXSMM #ifdef PADDLE_WITH_LIBXSMM
......
...@@ -113,27 +113,27 @@ template <> ...@@ -113,27 +113,27 @@ template <>
struct CBlas<float> { struct CBlas<float> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM(ARGS... args) { static void GEMM(ARGS... args) {
paddle::platform::dynload::cblas_sgemm(args...); phi::dynload::cblas_sgemm(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static float *GEMM_ALLOC(ARGS... args) { static float *GEMM_ALLOC(ARGS... args) {
return paddle::platform::dynload::cblas_sgemm_alloc(args...); return phi::dynload::cblas_sgemm_alloc(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_PACK(ARGS... args) { static void GEMM_PACK(ARGS... args) {
paddle::platform::dynload::cblas_sgemm_pack(args...); phi::dynload::cblas_sgemm_pack(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) { static void GEMM_COMPUTE(ARGS... args) {
paddle::platform::dynload::cblas_sgemm_compute(args...); phi::dynload::cblas_sgemm_compute(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_FREE(ARGS... args) { static void GEMM_FREE(ARGS... args) {
paddle::platform::dynload::cblas_sgemm_free(args...); phi::dynload::cblas_sgemm_free(args...);
} }
#ifdef PADDLE_WITH_LIBXSMM #ifdef PADDLE_WITH_LIBXSMM
...@@ -145,93 +145,93 @@ struct CBlas<float> { ...@@ -145,93 +145,93 @@ struct CBlas<float> {
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
paddle::platform::dynload::cblas_saxpy(args...); phi::dynload::cblas_saxpy(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
paddle::platform::dynload::cblas_scopy(args...); phi::dynload::cblas_scopy(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMV(ARGS... args) { static void GEMV(ARGS... args) {
paddle::platform::dynload::cblas_sgemv(args...); phi::dynload::cblas_sgemv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static float DOT(ARGS... args) { static float DOT(ARGS... args) {
return paddle::platform::dynload::cblas_sdot(args...); return phi::dynload::cblas_sdot(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void SCAL(ARGS... args) { static void SCAL(ARGS... args) {
paddle::platform::dynload::cblas_sscal(args...); phi::dynload::cblas_sscal(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static float ASUM(ARGS... args) { static float ASUM(ARGS... args) {
return paddle::platform::dynload::cblas_sasum(args...); return phi::dynload::cblas_sasum(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_BATCH(ARGS... args) { static void GEMM_BATCH(ARGS... args) {
paddle::platform::dynload::cblas_sgemm_batch(args...); phi::dynload::cblas_sgemm_batch(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VADD(ARGS... args) { static void VADD(ARGS... args) {
paddle::platform::dynload::vsAdd(args...); phi::dynload::vsAdd(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSUB(ARGS... args) { static void VSUB(ARGS... args) {
paddle::platform::dynload::vsSub(args...); phi::dynload::vsSub(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
paddle::platform::dynload::vsMul(args...); phi::dynload::vsMul(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VDIV(ARGS... args) { static void VDIV(ARGS... args) {
paddle::platform::dynload::vsDiv(args...); phi::dynload::vsDiv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VEXP(ARGS... args) { static void VEXP(ARGS... args) {
paddle::platform::dynload::vsExp(args...); phi::dynload::vsExp(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSQUARE(ARGS... args) { static void VSQUARE(ARGS... args) {
paddle::platform::dynload::vsSqr(args...); phi::dynload::vsSqr(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VPOW(ARGS... args) { static void VPOW(ARGS... args) {
paddle::platform::dynload::vsPowx(args...); phi::dynload::vsPowx(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VINV(ARGS... args) { static void VINV(ARGS... args) {
paddle::platform::dynload::vsInv(args...); phi::dynload::vsInv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMERF(ARGS... args) { static void VMERF(ARGS... args) {
paddle::platform::dynload::vmsErf(args...); phi::dynload::vmsErf(args...);
} }
#if !defined(_WIN32) #if !defined(_WIN32)
template <typename... ARGS> template <typename... ARGS>
static void CSRMM(ARGS... args) { static void CSRMM(ARGS... args) {
paddle::platform::dynload::mkl_scsrmm(args...); phi::dynload::mkl_scsrmm(args...);
} }
#endif #endif
template <typename... ARGS> template <typename... ARGS>
static void TRSM(ARGS... args) { static void TRSM(ARGS... args) {
paddle::platform::dynload::cblas_strsm(args...); phi::dynload::cblas_strsm(args...);
} }
}; };
...@@ -239,27 +239,27 @@ template <> ...@@ -239,27 +239,27 @@ template <>
struct CBlas<double> { struct CBlas<double> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM(ARGS... args) { static void GEMM(ARGS... args) {
paddle::platform::dynload::cblas_dgemm(args...); phi::dynload::cblas_dgemm(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static double *GEMM_ALLOC(ARGS... args) { static double *GEMM_ALLOC(ARGS... args) {
return paddle::platform::dynload::cblas_dgemm_alloc(args...); return phi::dynload::cblas_dgemm_alloc(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_PACK(ARGS... args) { static void GEMM_PACK(ARGS... args) {
paddle::platform::dynload::cblas_dgemm_pack(args...); phi::dynload::cblas_dgemm_pack(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) { static void GEMM_COMPUTE(ARGS... args) {
paddle::platform::dynload::cblas_dgemm_compute(args...); phi::dynload::cblas_dgemm_compute(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_FREE(ARGS... args) { static void GEMM_FREE(ARGS... args) {
paddle::platform::dynload::cblas_dgemm_free(args...); phi::dynload::cblas_dgemm_free(args...);
} }
#ifdef PADDLE_WITH_LIBXSMM #ifdef PADDLE_WITH_LIBXSMM
...@@ -271,93 +271,93 @@ struct CBlas<double> { ...@@ -271,93 +271,93 @@ struct CBlas<double> {
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
paddle::platform::dynload::cblas_daxpy(args...); phi::dynload::cblas_daxpy(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
paddle::platform::dynload::cblas_dcopy(args...); phi::dynload::cblas_dcopy(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMV(ARGS... args) { static void GEMV(ARGS... args) {
paddle::platform::dynload::cblas_dgemv(args...); phi::dynload::cblas_dgemv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static double DOT(ARGS... args) { static double DOT(ARGS... args) {
return paddle::platform::dynload::cblas_ddot(args...); return phi::dynload::cblas_ddot(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void SCAL(ARGS... args) { static void SCAL(ARGS... args) {
paddle::platform::dynload::cblas_dscal(args...); phi::dynload::cblas_dscal(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static double ASUM(ARGS... args) { static double ASUM(ARGS... args) {
return paddle::platform::dynload::cblas_dasum(args...); return phi::dynload::cblas_dasum(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_BATCH(ARGS... args) { static void GEMM_BATCH(ARGS... args) {
paddle::platform::dynload::cblas_dgemm_batch(args...); phi::dynload::cblas_dgemm_batch(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VADD(ARGS... args) { static void VADD(ARGS... args) {
paddle::platform::dynload::vdAdd(args...); phi::dynload::vdAdd(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSUB(ARGS... args) { static void VSUB(ARGS... args) {
paddle::platform::dynload::vdSub(args...); phi::dynload::vdSub(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
paddle::platform::dynload::vdMul(args...); phi::dynload::vdMul(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VDIV(ARGS... args) { static void VDIV(ARGS... args) {
paddle::platform::dynload::vdDiv(args...); phi::dynload::vdDiv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VEXP(ARGS... args) { static void VEXP(ARGS... args) {
paddle::platform::dynload::vdExp(args...); phi::dynload::vdExp(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSQUARE(ARGS... args) { static void VSQUARE(ARGS... args) {
paddle::platform::dynload::vdSqr(args...); phi::dynload::vdSqr(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VPOW(ARGS... args) { static void VPOW(ARGS... args) {
paddle::platform::dynload::vdPowx(args...); phi::dynload::vdPowx(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VINV(ARGS... args) { static void VINV(ARGS... args) {
paddle::platform::dynload::vdInv(args...); phi::dynload::vdInv(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMERF(ARGS... args) { static void VMERF(ARGS... args) {
paddle::platform::dynload::vmdErf(args...); phi::dynload::vmdErf(args...);
} }
#if !defined(_WIN32) #if !defined(_WIN32)
template <typename... ARGS> template <typename... ARGS>
static void CSRMM(ARGS... args) { static void CSRMM(ARGS... args) {
paddle::platform::dynload::mkl_dcsrmm(args...); phi::dynload::mkl_dcsrmm(args...);
} }
#endif #endif
template <typename... ARGS> template <typename... ARGS>
static void TRSM(ARGS... args) { static void TRSM(ARGS... args) {
paddle::platform::dynload::cblas_dtrsm(args...); phi::dynload::cblas_dtrsm(args...);
} }
}; };
...@@ -370,12 +370,12 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -370,12 +370,12 @@ struct CBlas<phi::dtype::complex<float>> {
const int incX, const int incX,
phi::dtype::complex<float> *Y, phi::dtype::complex<float> *Y,
const int incY) { const int incY) {
paddle::platform::dynload::cblas_caxpy(n, &alpha, X, incX, Y, incY); phi::dynload::cblas_caxpy(n, &alpha, X, incX, Y, incY);
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
paddle::platform::dynload::cblas_ccopy(args...); phi::dynload::cblas_ccopy(args...);
} }
// the libmklml_intel.so paddle used has no vcAdd, vcSub, // the libmklml_intel.so paddle used has no vcAdd, vcSub,
...@@ -384,22 +384,22 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -384,22 +384,22 @@ struct CBlas<phi::dtype::complex<float>> {
/* /*
template <typename... ARGS> template <typename... ARGS>
static void VADD(ARGS... args) { static void VADD(ARGS... args) {
paddle::platform::dynload::vcAdd(args...); phi::dynload::vcAdd(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSUB(ARGS... args) { static void VSUB(ARGS... args) {
paddle::platform::dynload::vcSub(args...); phi::dynload::vcSub(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
paddle::platform::dynload::vcMul(args...); phi::dynload::vcMul(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VDIV(ARGS... args) { static void VDIV(ARGS... args) {
paddle::platform::dynload::vcDiv(args...); phi::dynload::vcDiv(args...);
} }
*/ */
...@@ -458,7 +458,7 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -458,7 +458,7 @@ struct CBlas<phi::dtype::complex<float>> {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
const void *x_ = (const void *)(X); const void *x_ = (const void *)(X);
void *y_ = static_cast<void *>(Y); void *y_ = static_cast<void *>(Y);
paddle::platform::dynload::cblas_cgemv( phi::dynload::cblas_cgemv(
layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy); layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy);
} }
...@@ -480,7 +480,7 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -480,7 +480,7 @@ struct CBlas<phi::dtype::complex<float>> {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
const void *b_ = (const void *)(B); const void *b_ = (const void *)(B);
void *c_ = static_cast<void *>(C); void *c_ = static_cast<void *>(C);
paddle::platform::dynload::cblas_cgemm(layout, phi::dynload::cblas_cgemm(layout,
trans_a, trans_a,
trans_b, trans_b,
M, M,
...@@ -510,7 +510,7 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -510,7 +510,7 @@ struct CBlas<phi::dtype::complex<float>> {
int ldb) { int ldb) {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
void *b_ = static_cast<void *>(B); void *b_ = static_cast<void *>(B);
paddle::platform::dynload::cblas_ctrsm( phi::dynload::cblas_ctrsm(
layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb); layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb);
} }
...@@ -535,7 +535,7 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -535,7 +535,7 @@ struct CBlas<phi::dtype::complex<float>> {
const void **B_void = (const void **)(&(*B)); const void **B_void = (const void **)(&(*B));
void **C_void = reinterpret_cast<void **>(C); void **C_void = reinterpret_cast<void **>(C);
paddle::platform::dynload::cblas_cgemm_batch(layout, phi::dynload::cblas_cgemm_batch(layout,
trans_a, trans_a,
trans_b, trans_b,
M, M,
...@@ -555,7 +555,7 @@ struct CBlas<phi::dtype::complex<float>> { ...@@ -555,7 +555,7 @@ struct CBlas<phi::dtype::complex<float>> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM_EX(ARGS... args) { static void GEMM_EX(ARGS... args) {
paddle::platform::dynload::cblas_cgemm_batch(args...); phi::dynload::cblas_cgemm_batch(args...);
} }
}; };
...@@ -568,12 +568,12 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -568,12 +568,12 @@ struct CBlas<phi::dtype::complex<double>> {
const int incX, const int incX,
phi::dtype::complex<double> *Y, phi::dtype::complex<double> *Y,
const int incY) { const int incY) {
paddle::platform::dynload::cblas_zaxpy(n, &alpha, X, incX, Y, incY); phi::dynload::cblas_zaxpy(n, &alpha, X, incX, Y, incY);
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
paddle::platform::dynload::cblas_zcopy(args...); phi::dynload::cblas_zcopy(args...);
} }
// the libmklml_intel.so paddle used has no vzAdd, vzSub, // the libmklml_intel.so paddle used has no vzAdd, vzSub,
...@@ -582,22 +582,22 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -582,22 +582,22 @@ struct CBlas<phi::dtype::complex<double>> {
/* /*
template <typename... ARGS> template <typename... ARGS>
static void VADD(ARGS... args) { static void VADD(ARGS... args) {
paddle::platform::dynload::vzAdd(args...); phi::dynload::vzAdd(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VSUB(ARGS... args) { static void VSUB(ARGS... args) {
paddle::platform::dynload::vzSub(args...); phi::dynload::vzSub(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
paddle::platform::dynload::vzMul(args...); phi::dynload::vzMul(args...);
} }
template <typename... ARGS> template <typename... ARGS>
static void VDIV(ARGS... args) { static void VDIV(ARGS... args) {
paddle::platform::dynload::vzDiv(args...); phi::dynload::vzDiv(args...);
} }
*/ */
...@@ -656,7 +656,7 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -656,7 +656,7 @@ struct CBlas<phi::dtype::complex<double>> {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
const void *x_ = (const void *)(X); const void *x_ = (const void *)(X);
void *y_ = static_cast<void *>(Y); void *y_ = static_cast<void *>(Y);
paddle::platform::dynload::cblas_zgemv( phi::dynload::cblas_zgemv(
layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy); layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy);
} }
...@@ -678,7 +678,7 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -678,7 +678,7 @@ struct CBlas<phi::dtype::complex<double>> {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
const void *b_ = (const void *)(B); const void *b_ = (const void *)(B);
void *c_ = static_cast<void *>(C); void *c_ = static_cast<void *>(C);
paddle::platform::dynload::cblas_zgemm(layout, phi::dynload::cblas_zgemm(layout,
trans_a, trans_a,
trans_b, trans_b,
M, M,
...@@ -708,7 +708,7 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -708,7 +708,7 @@ struct CBlas<phi::dtype::complex<double>> {
int ldb) { int ldb) {
const void *a_ = (const void *)(A); const void *a_ = (const void *)(A);
void *b_ = static_cast<void *>(B); void *b_ = static_cast<void *>(B);
paddle::platform::dynload::cblas_ztrsm( phi::dynload::cblas_ztrsm(
layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb); layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb);
} }
...@@ -733,7 +733,7 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -733,7 +733,7 @@ struct CBlas<phi::dtype::complex<double>> {
const void **B_void = (const void **)(&(*B)); const void **B_void = (const void **)(&(*B));
void **C_void = reinterpret_cast<void **>(C); void **C_void = reinterpret_cast<void **>(C);
paddle::platform::dynload::cblas_zgemm_batch(layout, phi::dynload::cblas_zgemm_batch(layout,
trans_a, trans_a,
trans_b, trans_b,
M, M,
...@@ -753,7 +753,7 @@ struct CBlas<phi::dtype::complex<double>> { ...@@ -753,7 +753,7 @@ struct CBlas<phi::dtype::complex<double>> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM_EX(ARGS... args) { static void GEMM_EX(ARGS... args) {
paddle::platform::dynload::cblas_zgemm_batch(args...); phi::dynload::cblas_zgemm_batch(args...);
} }
}; };
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/dynload/rocblas.h" #include "paddle/phi/backends/dynload/rocblas.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -31,38 +31,33 @@ template <> ...@@ -31,38 +31,33 @@ template <>
struct CUBlas<float> { struct CUBlas<float> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM(ARGS... args) { static void GEMM(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sgemm(args...));
paddle::platform::dynload::rocblas_sgemm(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_saxpy(args...));
paddle::platform::dynload::rocblas_saxpy(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void SCAL(ARGS... args) { static void SCAL(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sscal(args...));
paddle::platform::dynload::rocblas_sscal(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_scopy(args...));
paddle::platform::dynload::rocblas_scopy(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMV(ARGS... args) { static void GEMV(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sgemv(args...));
paddle::platform::dynload::rocblas_sgemv(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_STRIDED_BATCH(ARGS... args) { static void GEMM_STRIDED_BATCH(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_sgemm_strided_batched(args...)); phi::dynload::rocblas_sgemm_strided_batched(args...));
} }
// HIP not supportted, refer to the doc here: // HIP not supportted, refer to the doc here:
...@@ -75,8 +70,7 @@ struct CUBlas<float> { ...@@ -75,8 +70,7 @@ struct CUBlas<float> {
template <typename... ARGS> template <typename... ARGS>
static void TRSM(ARGS... args) { static void TRSM(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_strsm(args...));
paddle::platform::dynload::rocblas_strsm(args...));
} }
template <typename... ARGS> template <typename... ARGS>
...@@ -108,38 +102,33 @@ template <> ...@@ -108,38 +102,33 @@ template <>
struct CUBlas<double> { struct CUBlas<double> {
template <typename... ARGS> template <typename... ARGS>
static void GEMM(ARGS... args) { static void GEMM(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dgemm(args...));
paddle::platform::dynload::rocblas_dgemm(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_daxpy(args...));
paddle::platform::dynload::rocblas_daxpy(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void SCAL(ARGS... args) { static void SCAL(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dscal(args...));
paddle::platform::dynload::rocblas_dscal(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void VCOPY(ARGS... args) { static void VCOPY(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dcopy(args...));
paddle::platform::dynload::rocblas_dcopy(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMV(ARGS... args) { static void GEMV(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dgemv(args...));
paddle::platform::dynload::rocblas_dgemv(args...));
} }
template <typename... ARGS> template <typename... ARGS>
static void GEMM_STRIDED_BATCH(ARGS... args) { static void GEMM_STRIDED_BATCH(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_dgemm_strided_batched(args...)); phi::dynload::rocblas_dgemm_strided_batched(args...));
} }
template <typename... ARGS> template <typename... ARGS>
...@@ -150,8 +139,7 @@ struct CUBlas<double> { ...@@ -150,8 +139,7 @@ struct CUBlas<double> {
template <typename... ARGS> template <typename... ARGS>
static void TRSM(ARGS... args) { static void TRSM(ARGS... args) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dtrsm(args...));
paddle::platform::dynload::rocblas_dtrsm(args...));
} }
template <typename... ARGS> template <typename... ARGS>
...@@ -197,7 +185,7 @@ struct CUBlas<phi::dtype::float16> { ...@@ -197,7 +185,7 @@ struct CUBlas<phi::dtype::float16> {
const float16 *beta, const float16 *beta,
float16 *C, float16 *C,
int ldc) { int ldc) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_hgemm( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_hgemm(
handle, handle,
transa, transa,
transb, transb,
...@@ -232,8 +220,7 @@ struct CUBlas<phi::dtype::float16> { ...@@ -232,8 +220,7 @@ struct CUBlas<phi::dtype::float16> {
int ldc, int ldc,
long long int strideC, // NOLINT long long int strideC, // NOLINT
int batchCount) { int batchCount) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_hgemm_strided_batched(
paddle::platform::dynload::rocblas_hgemm_strided_batched(
handle, handle,
transa, transa,
transb, transb,
...@@ -277,8 +264,7 @@ struct CUBlas<phi::dtype::float16> { ...@@ -277,8 +264,7 @@ struct CUBlas<phi::dtype::float16> {
rocblas_datatype computeType) { rocblas_datatype computeType) {
rocblas_gemm_algo algo = rocblas_gemm_algo_standard; rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle,
paddle::platform::dynload::rocblas_gemm_ex(handle,
transa, transa,
transb, transb,
m, m,
...@@ -320,7 +306,7 @@ struct CUBlas<phi::dtype::complex<float>> { ...@@ -320,7 +306,7 @@ struct CUBlas<phi::dtype::complex<float>> {
const phi::dtype::complex<float> *beta, const phi::dtype::complex<float> *beta,
phi::dtype::complex<float> *C, phi::dtype::complex<float> *C,
int ldc) { int ldc) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_cgemv( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemv(
handle, handle,
transa, transa,
m, m,
...@@ -342,7 +328,7 @@ struct CUBlas<phi::dtype::complex<float>> { ...@@ -342,7 +328,7 @@ struct CUBlas<phi::dtype::complex<float>> {
const int incX, const int incX,
phi::dtype::complex<float> *Y, phi::dtype::complex<float> *Y,
const int incY) { const int incY) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_caxpy( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_caxpy(
handle, handle,
n, n,
reinterpret_cast<const rocblas_float_complex *>(alpha), reinterpret_cast<const rocblas_float_complex *>(alpha),
...@@ -370,8 +356,7 @@ struct CUBlas<phi::dtype::complex<float>> { ...@@ -370,8 +356,7 @@ struct CUBlas<phi::dtype::complex<float>> {
int ldc, int ldc,
long long int strideC, // NOLINT long long int strideC, // NOLINT
int batchCount) { int batchCount) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemm_strided_batched(
paddle::platform::dynload::rocblas_cgemm_strided_batched(
handle, handle,
transa, transa,
transb, transb,
...@@ -406,7 +391,7 @@ struct CUBlas<phi::dtype::complex<float>> { ...@@ -406,7 +391,7 @@ struct CUBlas<phi::dtype::complex<float>> {
const phi::dtype::complex<float> *beta, const phi::dtype::complex<float> *beta,
phi::dtype::complex<float> *C, phi::dtype::complex<float> *C,
int ldc) { int ldc) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_cgemm( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemm(
handle, handle,
transa, transa,
transb, transb,
...@@ -446,8 +431,7 @@ struct CUBlas<phi::dtype::complex<float>> { ...@@ -446,8 +431,7 @@ struct CUBlas<phi::dtype::complex<float>> {
rocblas_datatype computeType) { rocblas_datatype computeType) {
rocblas_gemm_algo algo = rocblas_gemm_algo_standard; rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle,
paddle::platform::dynload::rocblas_gemm_ex(handle,
transa, transa,
transb, transb,
m, m,
...@@ -489,7 +473,7 @@ struct CUBlas<phi::dtype::complex<double>> { ...@@ -489,7 +473,7 @@ struct CUBlas<phi::dtype::complex<double>> {
const phi::dtype::complex<double> *beta, const phi::dtype::complex<double> *beta,
phi::dtype::complex<double> *C, phi::dtype::complex<double> *C,
int ldc) { int ldc) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_zgemv( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemv(
handle, handle,
transa, transa,
m, m,
...@@ -511,7 +495,7 @@ struct CUBlas<phi::dtype::complex<double>> { ...@@ -511,7 +495,7 @@ struct CUBlas<phi::dtype::complex<double>> {
const int incX, const int incX,
phi::dtype::complex<double> *Y, phi::dtype::complex<double> *Y,
const int incY) { const int incY) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_zaxpy( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zaxpy(
handle, handle,
n, n,
reinterpret_cast<const rocblas_double_complex *>(alpha), reinterpret_cast<const rocblas_double_complex *>(alpha),
...@@ -540,8 +524,7 @@ struct CUBlas<phi::dtype::complex<double>> { ...@@ -540,8 +524,7 @@ struct CUBlas<phi::dtype::complex<double>> {
int ldc, int ldc,
long long int strideC, // NOLINT long long int strideC, // NOLINT
int batchCount) { int batchCount) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemm_strided_batched(
paddle::platform::dynload::rocblas_zgemm_strided_batched(
handle, handle,
transa, transa,
transb, transb,
...@@ -576,7 +559,7 @@ struct CUBlas<phi::dtype::complex<double>> { ...@@ -576,7 +559,7 @@ struct CUBlas<phi::dtype::complex<double>> {
const phi::dtype::complex<double> *beta, const phi::dtype::complex<double> *beta,
phi::dtype::complex<double> *C, phi::dtype::complex<double> *C,
int ldc) { int ldc) {
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::rocblas_zgemm( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemm(
handle, handle,
transa, transa,
transb, transb,
...@@ -616,8 +599,7 @@ struct CUBlas<phi::dtype::complex<double>> { ...@@ -616,8 +599,7 @@ struct CUBlas<phi::dtype::complex<double>> {
rocblas_datatype computeType) { rocblas_datatype computeType) {
rocblas_gemm_algo algo = rocblas_gemm_algo_standard; rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle,
paddle::platform::dynload::rocblas_gemm_ex(handle,
transa, transa,
transb, transb,
m, m,
...@@ -778,7 +760,7 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA, ...@@ -778,7 +760,7 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA,
context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_gemm_ex(handle, phi::dynload::rocblas_gemm_ex(handle,
cuTransB, cuTransB,
cuTransA, cuTransA,
N, N,
...@@ -1159,7 +1141,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA, ...@@ -1159,7 +1141,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
const int64_t strideC = M * N; const int64_t strideC = M * N;
context_.CublasCall([&](rocblas_handle handle) { context_.CublasCall([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_sgemm_strided_batched(handle, phi::dynload::rocblas_sgemm_strided_batched(handle,
cuTransB, cuTransB,
cuTransA, cuTransA,
N, N,
...@@ -1209,7 +1191,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA, ...@@ -1209,7 +1191,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
const int64_t strideC = M * N; const int64_t strideC = M * N;
context_.CublasCall([&](rocblas_handle handle) { context_.CublasCall([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_dgemm_strided_batched(handle, phi::dynload::rocblas_dgemm_strided_batched(handle,
cuTransB, cuTransB,
cuTransA, cuTransA,
N, N,
...@@ -1261,8 +1243,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA, ...@@ -1261,8 +1243,7 @@ inline void Blas<phi::GPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::rocblas_gemm_strided_batched_ex( phi::dynload::rocblas_gemm_strided_batched_ex(handle,
handle,
cuTransB, cuTransB,
cuTransA, cuTransA,
N, N,
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
namespace phi { namespace phi {
...@@ -60,23 +60,23 @@ inline void vec_exp<float>(const int n, const float* x, float* y) { ...@@ -60,23 +60,23 @@ inline void vec_exp<float>(const int n, const float* x, float* y) {
y[i] = std::exp(x[i]); y[i] = std::exp(x[i]);
} }
} else { } else {
paddle::platform::dynload::vsExp(n, x, y); phi::dynload::vsExp(n, x, y);
} }
} }
template <> template <>
inline void vec_exp<double>(const int n, const double* x, double* y) { inline void vec_exp<double>(const int n, const double* x, double* y) {
paddle::platform::dynload::vdExp(n, x, y); phi::dynload::vdExp(n, x, y);
} }
template <> template <>
inline void vec_scal<float>(const int n, const float a, float* x) { inline void vec_scal<float>(const int n, const float a, float* x) {
paddle::platform::dynload::cblas_sscal(n, a, x, 1); phi::dynload::cblas_sscal(n, a, x, 1);
} }
template <> template <>
inline void vec_scal<double>(const int n, const double a, double* x) { inline void vec_scal<double>(const int n, const double a, double* x) {
paddle::platform::dynload::cblas_dscal(n, a, x, 1); phi::dynload::cblas_dscal(n, a, x, 1);
} }
#endif #endif
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
......
...@@ -102,14 +102,12 @@ class RNNDescriptors { ...@@ -102,14 +102,12 @@ class RNNDescriptors {
if (!is_test_ && !is_initialized) { if (!is_test_ && !is_initialized) {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::miopenDropoutGetStatesSize(handle, phi::dynload::miopenDropoutGetStatesSize(handle, &state_size));
&state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)}, dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place); place);
#else #else
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDropoutGetStatesSize(handle, phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
&state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)}, dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place); place);
#endif #endif
...@@ -124,8 +122,7 @@ class RNNDescriptors { ...@@ -124,8 +122,7 @@ class RNNDescriptors {
// ------------------- cudnn rnn descriptors --------------------- // ------------------- cudnn rnn descriptors ---------------------
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetRNNDescriptor_V2(
paddle::platform::dynload::miopenSetRNNDescriptor_V2(
rnn_desc_.desc(), rnn_desc_.desc(),
hidden_size_, hidden_size_,
num_layers_, num_layers_,
...@@ -137,8 +134,7 @@ class RNNDescriptors { ...@@ -137,8 +134,7 @@ class RNNDescriptors {
miopenRNNdefault, miopenRNNdefault,
cudnn_type)); cudnn_type));
#elif CUDNN_VERSION >= 6000 #elif CUDNN_VERSION >= 6000
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDescriptor_v6(
paddle::platform::dynload::cudnnSetRNNDescriptor_v6(
handle, handle,
rnn_desc_.desc(), rnn_desc_.desc(),
hidden_size_, hidden_size_,
...@@ -150,7 +146,7 @@ class RNNDescriptors { ...@@ -150,7 +146,7 @@ class RNNDescriptors {
CUDNN_RNN_ALGO_STANDARD, CUDNN_RNN_ALGO_STANDARD,
cudnn_type)); cudnn_type));
#else #else
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cudnnSetRNNDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDescriptor(
rnn_desc_.desc(), rnn_desc_.desc(),
hidden_size_, hidden_size_,
num_layers_, num_layers_,
...@@ -163,8 +159,7 @@ class RNNDescriptors { ...@@ -163,8 +159,7 @@ class RNNDescriptors {
#if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201
if (!sequence_length.empty()) { if (!sequence_length.empty()) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNPaddingMode(
paddle::platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED)); rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED));
} }
#endif #endif
...@@ -172,11 +167,10 @@ class RNNDescriptors { ...@@ -172,11 +167,10 @@ class RNNDescriptors {
// ------------------- cudnn weights_size --------------------- // ------------------- cudnn weights_size ---------------------
size_t weights_size_; size_t weights_size_;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNParamsSize(
paddle::platform::dynload::miopenGetRNNParamsSize(
handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type));
#else #else
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cudnnGetRNNParamsSize( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type));
#endif #endif
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -192,32 +186,22 @@ class RNNDescriptors { ...@@ -192,32 +186,22 @@ class RNNDescriptors {
// ------------------- cudnn workspace, reserve size --------------------- // ------------------- cudnn workspace, reserve size ---------------------
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::miopenGetRNNWorkspaceSize(handle, phi::dynload::miopenGetRNNWorkspaceSize(handle,
rnn_desc_.desc(), rnn_desc_.desc(),
seq_length_, seq_length_,
x_descs_.data(), x_descs_.data(),
workspace_size)); workspace_size));
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNTrainingReserveSize(
paddle::platform::dynload::miopenGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size));
handle,
rnn_desc_.desc(),
seq_length_,
x_descs_.data(),
reserve_size));
#else #else
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnGetRNNWorkspaceSize(handle, phi::dynload::cudnnGetRNNWorkspaceSize(handle,
rnn_desc_.desc(), rnn_desc_.desc(),
seq_length_, seq_length_,
x_descs_.data(), x_descs_.data(),
workspace_size)); workspace_size));
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNTrainingReserveSize(
paddle::platform::dynload::cudnnGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size));
handle,
rnn_desc_.desc(),
seq_length_,
x_descs_.data(),
reserve_size));
#endif #endif
} }
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
......
...@@ -434,8 +434,8 @@ void SyncBatchNormGradFunctor( ...@@ -434,8 +434,8 @@ void SyncBatchNormGradFunctor(
int dtype = paddle::platform::ToNCCLDataType( int dtype = paddle::platform::ToNCCLDataType(
paddle::framework::TransToProtoVarType(scale.dtype())); paddle::framework::TransToProtoVarType(scale.dtype()));
// In-place operation // In-place operation
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::ncclAllReduce( PADDLE_ENFORCE_GPU_SUCCESS(
stats, phi::dynload::ncclAllReduce(stats,
stats, stats,
2 * C + 1, 2 * C + 1,
static_cast<ncclDataType_t>(dtype), static_cast<ncclDataType_t>(dtype),
......
...@@ -1063,7 +1063,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1063,7 +1063,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE
: MIOPEN_SOFTMAX_MODE_CHANNEL; : MIOPEN_SOFTMAX_MODE_CHANNEL;
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::miopenSoftmaxForward_V2( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), paddle::platform::CudnnDataType<T>::kOne(),
desc, desc,
...@@ -1078,7 +1078,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1078,7 +1078,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
: CUDNN_SOFTMAX_MODE_CHANNEL; : CUDNN_SOFTMAX_MODE_CHANNEL;
auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE; auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cudnnSoftmaxForward( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward(
handle, handle,
algo, algo,
mode, mode,
...@@ -1135,8 +1135,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1135,8 +1135,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE
: MIOPEN_SOFTMAX_MODE_CHANNEL; : MIOPEN_SOFTMAX_MODE_CHANNEL;
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2(
paddle::platform::dynload::miopenSoftmaxBackward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), paddle::platform::CudnnDataType<T>::kOne(),
desc, desc,
...@@ -1153,7 +1152,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1153,7 +1152,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
: CUDNN_SOFTMAX_MODE_CHANNEL; : CUDNN_SOFTMAX_MODE_CHANNEL;
auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE; auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cudnnSoftmaxBackward( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxBackward(
handle, handle,
algo, algo,
mode, mode,
......
...@@ -24,9 +24,9 @@ ...@@ -24,9 +24,9 @@
#endif #endif
#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h"
#include "paddle/phi/kernels/funcs/padding.h" #include "paddle/phi/kernels/funcs/padding.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册