未验证 提交 3c1dc6f6 编写于 作者: W Wilber 提交者: GitHub

[PTEN] Move dynload from fluid to pten. (#39120)

* move dynload from fluid to pten.

* fix ci compile

* fix windows ci compile.

* update

* update

* fix compile error
上级 11938ae1
......@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace butil {
class IOBuf;
......@@ -78,11 +78,11 @@ void DeserializeFromMultiVarMsgAndIOBuf(const MultiVarMsg& multi_msg,
const framework::Scope* scope);
void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
butil::IOBufBytesIterator& iobuf,
butil::IOBufBytesIterator& iobuf, // NOLINT
const platform::DeviceContext& ctx);
void DeserializeSelectedRows(framework::Variable* var, const VarMsg& msg,
butil::IOBufBytesIterator& iobuf,
butil::IOBufBytesIterator& iobuf, // NOLINT
const platform::DeviceContext& ctx);
std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port);
......
......@@ -40,9 +40,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace distributed {
......@@ -202,7 +202,7 @@ class ValueBlock {
// value = _alloc.acquire(value_length_);
table[id] = value;
} else {
value = (VALUE *)(void *)(res->second);
value = (VALUE *)(void *)(res->second); // NOLINT
}
return value;
}
......@@ -282,8 +282,8 @@ class ValueBlock {
value->unseen_days_++;
if (value->unseen_days_ >= threshold) {
butil::return_object(iter->second);
//_alloc.release(iter->second);
//_alloc.release(value);
// _alloc.release(iter->second);
// _alloc.release(value);
iter = table.erase(iter);
} else {
++iter;
......
......@@ -38,8 +38,8 @@ limitations under the License. */
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace framework {
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "paddle/fluid/framework/io/shell.h"
#include "paddle/fluid/platform/enforce.h"
......
......@@ -34,8 +34,8 @@
#include <utility>
#include <vector>
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/backends/dynload/port.h"
#if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \
defined(__ARM_NEON__)
......
......@@ -34,7 +34,7 @@ limitations under the License. */
#include "paddle/fluid/framework/trainer_desc.pb.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace framework {
......
......@@ -19,7 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace inference {
......
......@@ -28,7 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
#ifdef _WIN32
#include <direct.h>
......
......@@ -20,7 +20,7 @@
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace framework {
......
......@@ -31,8 +31,8 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/pten/backends/dynload/port.h"
extern std::string paddle::framework::DataTypeToString(
const framework::proto::VarType::Type type);
......
......@@ -22,8 +22,8 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/pten/backends/dynload/port.h"
DECLARE_bool(use_mkldnn);
......
......@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
DECLARE_bool(use_mkldnn);
......
......@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace operators {
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
#define HCOM_GROUP_PREFIX "HCOM_GROUP_"
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce pten_dynamic_loader)
list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc)
......@@ -34,24 +34,24 @@ if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc)
endif(CUPTI_FOUND)
if(WITH_ROCM)
hip_library(dynload_cuda SRCS ${HIP_SRCS} DEPS dynamic_loader)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
hip_library(dynload_cuda SRCS ${HIP_SRCS} DEPS dynamic_loader pten_dynload_cuda)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc pten_dynload_warpctc)
elseif (WITH_ASCEND_CL)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc npu_hccl)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc npu_hccl pten_dynload_warpctc)
else()
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader pten_dynload_cuda)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc pten_dynload_warpctc)
endif()
if (WITH_MKLML)
cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml)
cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml pten_dynload_mklml)
endif()
cc_library(dynload_lapack SRCS lapack.cc DEPS dynamic_loader)
cc_library(dynload_lapack SRCS lapack.cc DEPS dynamic_loader pten_dynload_lapack)
add_dependencies(dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if (MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader)
cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader pten_dynload_mklrt)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()
......@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
......
......@@ -20,16 +20,12 @@ limitations under the License. */
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cublas.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cublas_dso_flag;
extern void *cublas_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublas routine
......@@ -37,19 +33,8 @@ extern void *cublas_dso_handle;
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublas_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublas_dso_flag, []() { \
cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublas_dso_handle, #__name); \
return reinterpret_cast<cublas_func>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
......@@ -99,7 +84,7 @@ extern void *cublas_dso_handle;
__macro(cublasSgetrsBatched); \
__macro(cublasDgetrsBatched);
CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
CUBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
// APIs available after CUDA 8.0
#if CUDA_VERSION >= 8000
......@@ -111,7 +96,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasZgemmStridedBatched); \
__macro(cublasHgemmStridedBatched);
CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
CUBLAS_BLAS_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.0
......@@ -120,7 +105,7 @@ CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasSetMathMode); \
__macro(cublasGetMathMode);
CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
CUBLAS_BLAS_ROUTINE_EACH_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.1
......@@ -129,10 +114,10 @@ CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasGemmBatchedEx); \
__macro(cublasGemmStridedBatchedEx);
CUBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
CUBLAS_BLAS_ROUTINE_EACH_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cublasLt_dso_flag;
void *cublasLt_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
......
......@@ -19,16 +19,12 @@ limitations under the License. */
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cublasLt.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cublasLt_dso_flag;
extern void *cublasLt_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublasLt routine
......@@ -36,20 +32,8 @@ extern void *cublasLt_dso_handle;
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublasLt_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublasLt_dso_flag, []() { \
cublasLt_dso_handle = \
paddle::platform::dynload::GetCublasLtDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublasLt_dso_handle, #__name); \
return reinterpret_cast<cublasLt_func>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
// APIs available after CUDA 10.1
......@@ -69,10 +53,10 @@ extern void *cublasLt_dso_handle;
__macro(cublasLtMatrixTransformDescDestroy); \
__macro(cublasLtMatrixTransformDescSetAttribute);
CUBLASLT_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP)
CUBLASLT_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP)
// #endif
#undef DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -13,14 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/cuda_driver.h"
#include "paddle/pten/backends/dynload/cuda_driver.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cuda_dso_flag;
void* cuda_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#if CUDA_VERSION >= 10020
......@@ -28,10 +26,7 @@ CUDA_ROUTINE_EACH_VVM(DEFINE_WRAP);
#endif
CUDA_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() {
std::call_once(cuda_dso_flag, []() { cuda_dso_handle = GetCUDADsoHandle(); });
return cuda_dso_handle != nullptr;
}
bool HasCUDADriver() { return pten::dynload::HasCUDADriver(); }
} // namespace dynload
} // namespace platform
......
......@@ -17,30 +17,17 @@ limitations under the License. */
#include <cuda.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cuda_driver.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cuda_dso_flag;
extern void* cuda_dso_handle;
extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cuda_func = decltype(&::__name); \
std::call_once(cuda_dso_flag, []() { \
cuda_dso_handle = paddle::platform::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(cuda_dso_handle, #__name); \
return reinterpret_cast<cuda_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed cuda driver functions
......@@ -72,12 +59,12 @@ extern bool HasCUDADriver();
__macro(cuMemRelease); \
__macro(cuMemAddressFree)
CUDA_ROUTINE_EACH_VVM(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
CUDA_ROUTINE_EACH_VVM(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#endif
CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
CUDA_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUDA_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP
} // namespace dynload
} // namespace platform
......
......@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/pten/backends/dynload/cudnn.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
......@@ -45,19 +43,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
#endif
bool HasCUDNN() {
std::call_once(cudnn_dso_flag,
[]() { cudnn_dso_handle = GetCUDNNDsoHandle(); });
return cudnn_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
}
bool HasCUDNN() { return pten::dynload::HasCUDNN(); }
} // namespace dynload
} // namespace platform
......
......@@ -18,32 +18,17 @@ limitations under the License. */
#include <glog/logging.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cudnn.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cudnn_dso_flag;
extern void* cudnn_dso_handle;
extern bool HasCUDNN();
extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed cudnn functions in HPPL
......@@ -127,7 +112,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor); \
__macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \
......@@ -135,7 +120,8 @@ CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnSetRNNDescriptor);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(
PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7001
......@@ -153,7 +139,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetConvolutionBackwardFilterAlgorithm_v7); \
__macro(cudnnGetConvolutionForwardAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount);
CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7201
......@@ -166,7 +152,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnRNNBackwardDataEx); \
__macro(cudnnRNNBackwardWeightsEx); \
__macro(cudnnRNNForwardInferenceEx);
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7401
......@@ -176,7 +162,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 8000
......@@ -192,7 +178,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnSetFusedOpsConstParamPackAttribute); \
__macro(cudnnSetFusedOpsVariantParamPackAttribute); \
__macro(cudnnMakeFusedOpsPlan);
CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_R8(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
} // namespace dynload
......
......@@ -13,31 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/cufft.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/pten/backends/dynload/cufft.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cufft_dso_flag;
void* cufft_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUFFT() {
std::call_once(cufft_dso_flag,
[]() { cufft_dso_handle = GetCUFFTDsoHandle(); });
return cufft_dso_handle != nullptr;
}
void EnforceCUFFTLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cufft_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load cufft shared library. Cannot invoke method %s.",
fn_name));
}
bool HasCUFFT() { return pten::dynload::HasCUFFT(); }
} // namespace dynload
} // namespace platform
......
......@@ -19,32 +19,17 @@ limitations under the License. */
#include <glog/logging.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cufft.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cufft_dso_flag;
extern void* cufft_dso_handle;
extern bool HasCUFFT();
extern void EnforceCUFFTLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cufft_func = decltype(&::__name); \
std::call_once(cufft_dso_flag, []() { \
cufft_dso_handle = paddle::platform::dynload::GetCUFFTDsoHandle(); \
}); \
EnforceCUFFTLoaded(#__name); \
static void* p_##__name = dlsym(cufft_dso_handle, #__name); \
return reinterpret_cast<cufft_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed cufft functions in HPPL
......@@ -104,7 +89,7 @@ extern void EnforceCUFFTLoaded(const char* fn_name);
__macro(cufftXtExecDescriptor); \
__macro(cufftXtSetWorkAreaPolicy);
CUFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUFFT_WRAP)
CUFFT_FFT_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUFFT_WRAP)
} // namespace dynload
} // namespace platform
......
......@@ -20,9 +20,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cupti_dso_flag;
void *cupti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -19,16 +19,12 @@ limitations under the License. */
#include <cupti.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cupti.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cupti_dso_flag;
extern void *cupti_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cupti routine
......@@ -36,18 +32,8 @@ extern void *cupti_dso_handle;
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
using cuptiFunc = decltype(&::__name); \
std::call_once(cupti_dso_flag, []() { \
cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle(); \
}); \
static void *p_##__name = dlsym(cupti_dso_handle, #__name); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define CUPTI_ROUTINE_EACH(__macro) \
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag curand_dso_flag;
void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,27 +16,14 @@ limitations under the License. */
#include <curand.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/curand.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag curand_dso_flag;
extern void *curand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
using curandFunc = decltype(&::__name); \
std::call_once(curand_dso_flag, []() { \
curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(curand_dso_handle, #__name); \
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define CURAND_RAND_ROUTINE_EACH(__macro) \
......@@ -48,7 +35,7 @@ extern void *curand_dso_handle;
__macro(curandGenerateNormal); \
__macro(curandDestroyGenerator);
CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
CURAND_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace platform
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cusolver_dso_flag;
void *cusolver_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUSOLVER_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -17,28 +17,14 @@ limitations under the License. */
#include <cusolverDn.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cusolver.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cusolver_dso_flag;
extern void *cusolver_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusolverStatus_t operator()(Args... args) { \
using cusolverFunc = decltype(&::__name); \
std::call_once(cusolver_dso_flag, []() { \
cusolver_dso_handle = \
paddle::platform::dynload::GetCusolverDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusolver_dso_handle, #__name); \
return reinterpret_cast<cusolverFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define CUSOLVER_ROUTINE_EACH(__macro) \
......@@ -62,7 +48,7 @@ extern void *cusolver_dso_handle;
__macro(cusolverDnCheevd); \
__macro(cusolverDnZheevd);
CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
CUSOLVER_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
#if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R1(__macro) \
......@@ -105,7 +91,7 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
__macro(cusolverDnCungqr); \
__macro(cusolverDnZungqr);
CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
CUSOLVER_ROUTINE_EACH_R1(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#if CUDA_VERSION >= 9020
......@@ -117,10 +103,10 @@ CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
__macro(cusolverDnDsyevj); \
__macro(cusolverDnDestroySyevjInfo);
CUSOLVER_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
CUSOLVER_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cusparse_dso_flag;
void *cusparse_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#ifdef CUSPARSE_ROUTINE_EACH
......
......@@ -17,28 +17,14 @@ limitations under the License. */
#include <cusparse.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/cusparse.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cusparse_dso_flag;
extern void *cusparse_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusparseStatus_t operator()(Args... args) { \
using cusparseFunc = decltype(&::__name); \
std::call_once(cusparse_dso_flag, []() { \
cusparse_dso_handle = \
paddle::platform::dynload::GetCusparseDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \
return reinterpret_cast<cusparseFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#if defined(PADDLE_WITH_CUDA)
......@@ -54,7 +40,7 @@ extern void *cusparse_dso_handle;
__macro(cusparseSetMatType); \
__macro(cusparseSetMatIndexBase);
CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
// APIs available after CUDA 11.2
#if CUDA_VERSION >= 11020
......@@ -74,7 +60,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
__macro(cusparseSparseToDense_bufferSize); \
__macro(cusparseSparseToDense);
CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
CUSPARSE_ROUTINE_EACH_11020(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
// APIs available after CUDA 11.3
#if CUDA_VERSION >= 11030
......@@ -83,13 +69,13 @@ CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
__macro(cusparseSDDMM_preprocess); \
__macro(cusparseSDDMM);
CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
CUSPARSE_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
#endif
#endif
#endif
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag hipfft_dso_flag;
void *hipfft_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -17,8 +17,7 @@ limitations under the License. */
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/hipfft.h"
namespace paddle {
namespace platform {
......@@ -26,18 +25,8 @@ namespace dynload {
extern std::once_flag hipfft_dso_flag;
extern void *hipfft_dso_handle;
#define DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hipfftFunc = decltype(&::__name); \
std::call_once(hipfft_dso_flag, []() { \
hipfft_dso_handle = paddle::platform::dynload::GetROCFFTDsoHandle(); \
}); \
static void *p_##__name = dlsym(hipfft_dso_handle, #__name); \
return reinterpret_cast<hipfftFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define HIPFFT_FFT_ROUTINE_EACH(__macro) \
......@@ -70,53 +59,8 @@ extern void *hipfft_dso_handle;
__macro(hipfftGetVersion); \
__macro(hipfftGetProperty);
HIPFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
HIPFFT_FFT_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
inline const char *hipfftGetErrorString(hipfftResult_t status) {
switch (status) {
case HIPFFT_SUCCESS:
return "'HIPFFT_SUCCESS'. The hipFFT operation was successful.";
case HIPFFT_INVALID_PLAN:
return "'HIPFFT_INVALID_PLAN'. hipFFT was passed an invalid plan handle.";
case HIPFFT_ALLOC_FAILED:
return "'HIPFFT_ALLOC_FAILED'. hipFFT failed to allocate GPU or CPU "
"memory.";
case HIPFFT_INVALID_TYPE:
return "'HIPFFT_INVALID_TYPE'. No longer used.";
case HIPFFT_INVALID_VALUE:
return "'HIPFFT_INVALID_VALUE'. User specified an invalid pointer or "
"parameter.";
case HIPFFT_INTERNAL_ERROR:
return "'HIPFFT_INTERNAL_ERROR'. Driver or internal hipFFT library "
"error.";
case HIPFFT_EXEC_FAILED:
return "'HIPFFT_EXEC_FAILED'. Failed to execute an FFT on the GPU.";
case HIPFFT_SETUP_FAILED:
return "'HIPFFT_SETUP_FAILED'. The hipFFT library failed to initialize.";
case HIPFFT_INVALID_SIZE:
return "'HIPFFT_INVALID_SIZE'. User specified an invalid transform size.";
case HIPFFT_UNALIGNED_DATA:
return "'HIPFFT_UNALIGNED_DATA'. No longer used.";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:
return "'HIPFFT_INCOMPLETE_PARAMETER_LIST'. Missing parameters in call.";
case HIPFFT_INVALID_DEVICE:
return "'HIPFFT_INVALID_DEVICE'. Execution of a plan was on different "
"GPU than plan creation.";
case HIPFFT_PARSE_ERROR:
return "'HIPFFT_PARSE_ERROR'. Internal plan database error.";
case HIPFFT_NO_WORKSPACE:
return "'HIPFFT_NO_WORKSPACE'. No workspace has been provided prior to "
"plan execution.";
case HIPFFT_NOT_IMPLEMENTED:
return "'HIPFFT_NOT_IMPLEMENTED'. Function does not implement "
"functionality for parameters given.";
case HIPFFT_NOT_SUPPORTED:
return "'HIPFFT_NOT_SUPPORTED'. Operation is not supported for "
"parameters given.";
default:
return "HIPFFT_STATUS_UNKNOWN_ERROR";
}
}
} // namespace dynload
} // namespace platform
} // namespace paddle
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag hiprand_dso_flag;
void *hiprand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,28 +16,15 @@ limitations under the License. */
#include <hiprand.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/hiprand.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag hiprand_dso_flag;
extern void *hiprand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
hiprandStatus_t operator()(Args... args) { \
using hiprandFunc = decltype(&::__name); \
std::call_once(hiprand_dso_flag, []() { \
hiprand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(hiprand_dso_handle, #__name); \
return reinterpret_cast<hiprandFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define HIPRAND_RAND_ROUTINE_EACH(__macro) \
......@@ -49,7 +36,7 @@ extern void *hiprand_dso_handle;
__macro(hiprandGenerateNormal); \
__macro(hiprandDestroyGenerator);
HIPRAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
HIPRAND_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace platform
......
......@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/hiprtc.h"
#include "paddle/pten/backends/dynload/hiprtc.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag hiprtc_dso_flag;
void* hiprtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() {
std::call_once(hiprtc_dso_flag,
[]() { hiprtc_dso_handle = GetNVRTCDsoHandle(); });
return hiprtc_dso_handle != nullptr;
}
bool HasNVRTC() { return pten::dynload::HasNVRTC(); }
} // namespace dynload
} // namespace platform
......
......@@ -16,30 +16,17 @@ limitations under the License. */
#include <hip/hiprtc.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/hiprtc.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag hiprtc_dso_flag;
extern void* hiprtc_dso_handle;
extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hiprtc_func = decltype(&::__name); \
std::call_once(hiprtc_dso_flag, []() { \
hiprtc_dso_handle = paddle::platform::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(hiprtc_dso_handle, #__name); \
return reinterpret_cast<hiprtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed hiprtc functions
......@@ -55,9 +42,9 @@ extern bool HasNVRTC();
__macro(hiprtcGetProgramLog); \
__macro(hiprtcGetProgramLogSize)
HIPRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP);
HIPRTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP
} // namespace dynload
} // namespace platform
......
......@@ -13,15 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/lapack.h"
#include <mutex>
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
LAPACK_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,122 +16,20 @@ limitations under the License. */
#include <complex>
#include <mutex>
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
// Note(zhouwei): because lapack doesn't provide appropriate header file.
// should expose API statement yourself.
// getrf_(For example)
extern "C" void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv,
int *info);
extern "C" void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv,
int *info);
// evd
extern "C" void zheevd_(char *jobz, char *uplo, int *n, std::complex<double> *a,
int *lda, double *w, std::complex<double> *work,
int *lwork, double *rwork, int *lrwork, int *iwork,
int *liwork, int *info);
extern "C" void cheevd_(char *jobz, char *uplo, int *n, std::complex<float> *a,
int *lda, float *w, std::complex<float> *work,
int *lwork, float *rwork, int *lrwork, int *iwork,
int *liwork, int *info);
extern "C" void dsyevd_(char *jobz, char *uplo, int *n, double *a, int *lda,
double *w, double *work, int *lwork, int *iwork,
int *liwork, int *info);
extern "C" void ssyevd_(char *jobz, char *uplo, int *n, float *a, int *lda,
float *w, float *work, int *lwork, int *iwork,
int *liwork, int *info);
// geev
extern "C" void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda,
double *wr, double *wi, double *vl, int *ldvl,
double *vr, int *ldvr, double *work, int *lwork,
int *info);
extern "C" void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda,
float *wr, float *wi, float *vl, int *ldvl, float *vr,
int *ldvr, float *work, int *lwork, int *info);
extern "C" void zgeev_(char *jobvl, char *jobvr, int *n,
std::complex<double> *a, int *lda,
std::complex<double> *w, std::complex<double> *vl,
int *ldvl, std::complex<double> *vr, int *ldvr,
std::complex<double> *work, int *lwork, double *rwork,
int *info);
extern "C" void cgeev_(char *jobvl, char *jobvr, int *n, std::complex<float> *a,
int *lda, std::complex<float> *w,
std::complex<float> *vl, int *ldvl,
std::complex<float> *vr, int *ldvr,
std::complex<float> *work, int *lwork, float *rwork,
int *info);
// gels
extern "C" void dgels_(char *trans, int *m, int *n, int *nrhs, double *a,
int *lda, double *b, int *ldb, double *work, int *lwork,
int *info);
extern "C" void sgels_(char *trans, int *m, int *n, int *nrhs, float *a,
int *lda, float *b, int *ldb, float *work, int *lwork,
int *info);
// gelsd
extern "C" void dgelsd_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, double *s, double *rcond,
int *rank, double *work, int *lwork, int *iwork,
int *info);
extern "C" void sgelsd_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, float *s, float *rcond, int *rank,
float *work, int *lwork, int *iwork, int *info);
// gelsy
extern "C" void dgelsy_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, int *jpvt, double *rcond,
int *rank, double *work, int *lwork, int *info);
extern "C" void sgelsy_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, int *jpvt, float *rcond, int *rank,
float *work, int *lwork, int *info);
// gelss
extern "C" void dgelss_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, double *s, double *rcond,
int *rank, double *work, int *lwork, int *info);
extern "C" void sgelss_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, float *s, float *rcond, int *rank,
float *work, int *lwork, int *info);
extern "C" void zpotrs_(char *uplo, int *n, int *nrhs, std::complex<double> *a,
int *lda, std::complex<double> *b, int *ldb, int *info);
extern "C" void cpotrs_(char *uplo, int *n, int *nrhs, std::complex<float> *a,
int *lda, std::complex<float> *b, int *ldb, int *info);
extern "C" void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, int *info);
extern "C" void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda,
float *b, int *ldb, int *info);
#include "paddle/pten/backends/dynload/lapack.h"
#include "paddle/pten/common/complex.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag lapack_dso_flag;
extern void *lapack_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using lapackFunc = decltype(&::__name); \
std::call_once(lapack_dso_flag, []() { \
lapack_dso_handle = paddle::platform::dynload::GetLAPACKDsoHandle(); \
}); \
static void *p_##_name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapackFunc>(p_##_name)(args...); \
} \
}; \
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_LAPACK_WRAP(__name) \
......
......@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/miopen.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/pten/backends/dynload/cudnn.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag miopen_dso_flag;
void* miopen_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
......@@ -50,19 +48,7 @@ MIOPEN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
#endif
bool HasCUDNN() {
std::call_once(miopen_dso_flag,
[]() { miopen_dso_handle = GetCUDNNDsoHandle(); });
return miopen_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
miopen_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load miopen shared library. Cannot invoke method %s.",
fn_name));
}
bool HasCUDNN() { return pten::dynload::HasCUDNN(); }
} // namespace dynload
} // namespace platform
......
......@@ -18,66 +18,17 @@ limitations under the License. */
#include <miopen/miopen.h>
#include <miopen/version.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#define MIOPEN_VERSION \
(MIOPEN_VERSION_MAJOR * 1000 + MIOPEN_VERSION_MINOR * 10 + \
MIOPEN_VERSION_PATCH) // NOLINT
// MIOPEN only support NCHW, just for compatibility with CUDNN API
typedef enum {
MIOPEN_TENSOR_NCHW = 0,
MIOPEN_TENSOR_NHWC = 1,
} miopenTensorFormat_t;
#include "paddle/pten/backends/dynload/miopen.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag miopen_dso_flag;
extern void* miopen_dso_handle;
extern bool HasCUDNN();
inline const char* miopenGetErrorString(miopenStatus_t status) {
switch (status) {
case miopenStatusSuccess:
return "MIOPEN_STATUS_SUCCESS";
case miopenStatusNotInitialized:
return "MIOPEN_STATUS_NOT_INITIALIZED";
case miopenStatusInvalidValue:
return "MIOPEN_STATUS_INVALID_VALUE";
case miopenStatusBadParm:
return "MIOPEN_STATUS_BAD_PARAM";
case miopenStatusAllocFailed:
return "MIOPEN_STATUS_ALLOC_FAILED";
case miopenStatusInternalError:
return "MIOPEN_STATUS_INTERNAL_ERROR";
case miopenStatusNotImplemented:
return "MIOPEN_STATUS_NOT_IMPLEMENTED";
case miopenStatusUnsupportedOp:
return "MIOPEN_STATUS_UNSUPPORTED_OP";
case miopenStatusUnknownError:
default:
return "MIOPEN_STATUS_UNKNOWN_ERROR";
}
}
extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using miopen_func = decltype(&::__name); \
std::call_once(miopen_dso_flag, []() { \
miopen_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(miopen_dso_handle, #__name); \
return reinterpret_cast<miopen_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed miopen functions in HPPL
......@@ -145,23 +96,23 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(miopenRNNForwardInference); \
__macro(miopenGetTensorNumBytes);
MIOPEN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(miopenConvolutionBackwardData);
MIOPEN_DNN_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs available after R3:
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \
__macro(miopenConvolutionBackwardWeightsGetWorkSpaceSize);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs available after R4:
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(miopenBatchNormalizationForwardTraining); \
__macro(miopenBatchNormalizationForwardInference); \
__macro(miopenBatchNormalizationBackward);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs in R5
#define MIOPEN_DNN_ROUTINE_EACH_R5(__macro) \
......@@ -169,12 +120,12 @@ MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
__macro(miopenSetActivationDescriptor); \
__macro(miopenGetActivationDescriptor); \
__macro(miopenDestroyActivationDescriptor);
MIOPEN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_R5(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs in R6
#define MIOPEN_DNN_ROUTINE_EACH_R6(__macro) \
/*__macro(miopenSetRNNDescriptor_v6);*/
MIOPEN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_R6(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(miopenSetConvolutionGroupCount); \
......@@ -184,7 +135,7 @@ MIOPEN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
__macro(miopenSetCTCLossDescriptor); \
__macro(miopenGetCTCLossWorkspaceSize); \
__macro(miopenCTCLoss);
MIOPEN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \
/*__macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \
......@@ -192,7 +143,7 @@ __macro(cudnnBatchNormalizationForwardTrainingEx); \
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);*/
MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag mklml_dso_flag;
void* mklml_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
MKLML_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -17,36 +17,23 @@ limitations under the License. */
#include <mkl.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/mklml.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag mklml_dso_flag;
extern void *mklml_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load mklml routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
}); \
static void *p_##_name = dlsym(mklml_dso_handle, #__name); \
return reinterpret_cast<mklmlFunc>(p_##_name)(args...); \
} \
}; \
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) DYNAMIC_LOAD_MKLML_WRAP(__name)
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
DYNAMIC_LOAD_MKLML_WRAP(__name)
#define MKLML_ROUTINE_EACH(__macro) \
__macro(cblas_sgemm); \
......@@ -111,7 +98,7 @@ extern void *mklml_dso_handle;
__macro(MKL_Set_Num_Threads); \
__macro(MKL_Get_Max_Threads);
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace platform {
......@@ -32,18 +32,8 @@ extern void* mklrt_dso_handle;
* (for each function) to dynamic load mkldfti routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLRT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mklrtFunc = decltype(&::__name); \
std::call_once(mklrt_dso_flag, []() { \
mklrt_dso_handle = paddle::platform::dynload::GetMKLRTDsoHandle(); \
}); \
static void* p_##__name = dlsym(mklrt_dso_handle, #__name); \
return reinterpret_cast<mklrtFunc>(p_##__name)(args...); \
} \
}; \
#define DYNAMIC_LOAD_MKLRT_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
// mkl_dfti.h has a macro that shadows the function with the same name
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag nccl_dso_flag;
void *nccl_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,28 +16,14 @@ limitations under the License. */
#include <nccl.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/nccl.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag nccl_dso_flag;
extern void* nccl_dso_handle;
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(&::__name); \
std::call_once(nccl_dso_flag, []() { \
nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
}); \
static void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define NCCL_RAND_ROUTINE_EACH(__macro) \
......@@ -57,30 +43,30 @@ extern void* nccl_dso_handle;
__macro(ncclReduceScatter); \
__macro(ncclGetErrorString);
NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
NCCL_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#if NCCL_VERSION_CODE >= 2212
#define NCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast);
NCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
NCCL_RAND_ROUTINE_EACH_AFTER_2212(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif
#if NCCL_VERSION_CODE >= 2304
#define NCCL_RAND_ROUTINE_EACH_AFTER_2304(__macro) __macro(ncclGetVersion);
NCCL_RAND_ROUTINE_EACH_AFTER_2304(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
NCCL_RAND_ROUTINE_EACH_AFTER_2304(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif
#if NCCL_VERSION_CODE >= 2703
#define NCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \
__macro(ncclSend); \
__macro(ncclRecv);
NCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
NCCL_RAND_ROUTINE_EACH_AFTER_2703(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif
#if NCCL_VERSION_CODE >= 21100
#define NCCL_RAND_ROUTINE_EACH_AFTER_21100(__macro) \
__macro(ncclRedOpCreatePreMulSum); \
__macro(ncclRedOpDestroy);
NCCL_RAND_ROUTINE_EACH_AFTER_21100(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
NCCL_RAND_ROUTINE_EACH_AFTER_21100(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif
} // namespace dynload
......
......@@ -15,9 +15,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag nvjpeg_dso_flag;
void *nvjpeg_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
NVJPEG_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -14,27 +14,14 @@ limitations under the License. */
#include <nvjpeg.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/nvjpeg.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag nvjpeg_dso_flag;
extern void *nvjpeg_dso_handle;
#define DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
nvjpegStatus_t operator()(Args... args) { \
using nvjpegFunc = decltype(&::__name); \
std::call_once(nvjpeg_dso_flag, []() { \
nvjpeg_dso_handle = paddle::platform::dynload::GetNvjpegDsoHandle(); \
}); \
static void *p_##__name = dlsym(nvjpeg_dso_handle, #__name); \
return reinterpret_cast<nvjpegFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define NVJPEG_RAND_ROUTINE_EACH(__macro) \
......@@ -44,7 +31,7 @@ extern void *nvjpeg_dso_handle;
__macro(nvjpegJpegStateDestroy); \
__macro(nvjpegDecode);
NVJPEG_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP);
NVJPEG_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP);
} // namespace dynload
} // namespace platform
......
......@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/nvrtc.h"
#include "paddle/pten/backends/dynload/nvrtc.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag nvrtc_dso_flag;
void* nvrtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
NVRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() {
std::call_once(nvrtc_dso_flag,
[]() { nvrtc_dso_handle = GetNVRTCDsoHandle(); });
return nvrtc_dso_handle != nullptr;
}
bool HasNVRTC() { return pten::dynload::HasNVRTC(); }
} // namespace dynload
} // namespace platform
......
......@@ -17,30 +17,17 @@ limitations under the License. */
#include <nvrtc.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/nvrtc.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag nvrtc_dso_flag;
extern void* nvrtc_dso_handle;
extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using nvrtc_func = decltype(&::__name); \
std::call_once(nvrtc_dso_flag, []() { \
nvrtc_dso_handle = paddle::platform::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(nvrtc_dso_handle, #__name); \
return reinterpret_cast<nvrtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed nvrtc functions
......@@ -56,9 +43,9 @@ extern bool HasNVRTC();
__macro(nvrtcGetProgramLog); \
__macro(nvrtcGetProgramLogSize)
NVRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVRTC_WRAP);
NVRTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_NVRTC_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP
} // namespace dynload
} // namespace platform
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag nvtx_dso_flag;
void *nvtx_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
NVTX_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -17,36 +17,23 @@ limitations under the License. */
#include <nvToolsExt.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/nvtx.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag nvtx_dso_flag;
extern void *nvtx_dso_handle;
#define DECLARE_DYNAMIC_LOAD_NVTX_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
int operator()(Args... args) { \
using nvtxFunc = decltype(&::__name); \
std::call_once(nvtx_dso_flag, []() { \
nvtx_dso_handle = paddle::platform::dynload::GetNvtxDsoHandle(); \
}); \
static void *p_##__name = dlsym(nvtx_dso_handle, #__name); \
return reinterpret_cast<nvtxFunc>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define NVTX_ROUTINE_EACH(__macro) \
__macro(nvtxRangePushA); \
__macro(nvtxRangePop);
NVTX_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVTX_WRAP);
NVTX_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP);
#undef DECLARE_DYNAMIC_LOAD_NVTX_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag rccl_dso_flag;
void *rccl_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
RCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,28 +16,14 @@ limitations under the License. */
#include <rccl.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/rccl.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag rccl_dso_flag;
extern void* rccl_dso_handle;
#define DECLARE_DYNAMIC_LOAD_RCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(&::__name); \
std::call_once(rccl_dso_flag, []() { \
rccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
}); \
static void* p_##__name = dlsym(rccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define RCCL_RAND_ROUTINE_EACH(__macro) \
......@@ -57,18 +43,18 @@ extern void* rccl_dso_handle;
__macro(ncclReduceScatter); \
__macro(ncclGetErrorString);
RCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
RCCL_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#if NCCL_VERSION_CODE >= 2212
#define RCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast);
RCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
RCCL_RAND_ROUTINE_EACH_AFTER_2212(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#endif
#if NCCL_VERSION_CODE >= 2703
#define RCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \
__macro(ncclSend); \
__macro(ncclRecv);
RCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
RCCL_RAND_ROUTINE_EACH_AFTER_2703(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#endif
} // namespace dynload
......
......@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag rocblas_dso_flag;
void *rocblas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
......
......@@ -19,16 +19,12 @@ limitations under the License. */
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/rocblas.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag rocblas_dso_flag;
extern void *rocblas_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublas routine
......@@ -36,18 +32,8 @@ extern void *rocblas_dso_handle;
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
rocblas_status operator()(Args... args) { \
using rocblas_func = decltype(&::__name); \
std::call_once(rocblas_dso_flag, []() { \
rocblas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(rocblas_dso_handle, #__name); \
return reinterpret_cast<rocblas_func>(p_##__name)(args...); \
} \
}; \
#define PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define ROCBLAS_BLAS_ROUTINE_EACH(__macro) \
......@@ -83,7 +69,7 @@ extern void *rocblas_dso_handle;
__macro(rocblas_set_pointer_mode); \
__macro(rocblas_get_pointer_mode);
ROCBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
ROCBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
// APIs available after CUDA 8.0
#define ROCBLAS_BLAS_ROUTINE_EACH_R2(__macro) \
......@@ -94,21 +80,21 @@ ROCBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
__macro(rocblas_zgemm_strided_batched); \
__macro(rocblas_hgemm_strided_batched);
ROCBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
ROCBLAS_BLAS_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
// HIP not supported in ROCM3.5
// #define ROCBLAS_BLAS_ROUTINE_EACH_R3(__macro)
// __macro(cublasSetMathMode);
// __macro(cublasGetMathMode);
// ROCBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
// ROCBLAS_BLAS_ROUTINE_EACH_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
#define ROCBLAS_BLAS_ROUTINE_EACH_R4(__macro) \
__macro(rocblas_gemm_batched_ex); \
__macro(rocblas_gemm_strided_batched_ex);
ROCBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
ROCBLAS_BLAS_ROUTINE_EACH_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
#undef DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/rocm_driver.h"
#include "paddle/pten/backends/dynload/rocm_driver.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag rocm_dso_flag;
void* rocm_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
ROCM_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() {
std::call_once(rocm_dso_flag, []() { rocm_dso_handle = GetCUDADsoHandle(); });
return rocm_dso_handle != nullptr;
}
bool HasCUDADriver() { return pten::dynload::HasCUDADriver(); }
} // namespace dynload
} // namespace platform
......
......@@ -17,30 +17,17 @@ limitations under the License. */
#include <hip/hip_runtime.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/rocm_driver.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag rocm_dso_flag;
extern void* rocm_dso_handle;
extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_ROCM_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using rocm_func = decltype(&::__name); \
std::call_once(rocm_dso_flag, []() { \
rocm_dso_handle = paddle::platform::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(rocm_dso_handle, #__name); \
return reinterpret_cast<rocm_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
/**
* include all needed cuda driver functions
......@@ -59,9 +46,9 @@ extern bool HasCUDADriver();
__macro(hipGetDeviceCount); \
__macro(hipDevicePrimaryCtxGetState)
ROCM_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCM_WRAP);
ROCM_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP);
#undef DECLARE_DYNAMIC_LOAD_ROCM_WRAP
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP
} // namespace dynload
} // namespace platform
......
......@@ -18,9 +18,6 @@ namespace paddle {
namespace platform {
namespace dynload {
std::once_flag warpctc_dso_flag;
void* warpctc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
WARPCTC_ROUTINE_EACH(DEFINE_WRAP);
......
......@@ -16,34 +16,19 @@ limitations under the License. */
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
#include "warpctc/include/ctc.h"
#include "paddle/pten/backends/dynload/warpctc.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag warpctc_dso_flag;
extern void* warpctc_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load warpctc routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
}); \
static void* p_##_name = dlsym(warpctc_dso_handle, #__name); \
return reinterpret_cast<warpctcFunc>(p_##_name)(args...); \
} \
}; \
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
......
......@@ -65,30 +65,30 @@ limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/to_string.h"
#include "paddle/pten/backends/dynload/port.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#include "paddle/fluid/platform/dynload/cusolver.h"
#include "paddle/pten/backends/dynload/cublas.h"
#include "paddle/pten/backends/dynload/cudnn.h"
#include "paddle/pten/backends/dynload/curand.h"
#include "paddle/pten/backends/dynload/cusolver.h"
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
#include <error.h>
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/pten/backends/dynload/nccl.h"
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/dynload/hipfft.h"
#include "paddle/fluid/platform/dynload/hiprand.h"
#include "paddle/fluid/platform/dynload/miopen.h"
#include "paddle/fluid/platform/dynload/rocblas.h"
#include "paddle/pten/backends/dynload/hipfft.h"
#include "paddle/pten/backends/dynload/hiprand.h"
#include "paddle/pten/backends/dynload/miopen.h"
#include "paddle/pten/backends/dynload/rocblas.h"
#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL)
#include <error.h> // NOLINT
#include "paddle/fluid/platform/dynload/rccl.h"
#include "paddle/pten/backends/dynload/rccl.h"
#endif // __APPLE__
#endif // PADDLE_WITH_HIP
......@@ -880,7 +880,7 @@ inline bool is_error(cudnnStatus_t stat) {
inline std::string build_nvidia_error_msg(cudnnStatus_t stat) {
std::ostringstream sout;
sout << "CUDNN error(" << stat << "), "
<< platform::dynload::cudnnGetErrorString(stat) << ". "
<< pten::dynload::cudnnGetErrorString(stat) << ". "
<< GetExternalErrorMsg(stat);
return sout.str();
}
......@@ -945,7 +945,7 @@ inline bool is_error(ncclResult_t nccl_result) {
inline std::string build_nvidia_error_msg(ncclResult_t nccl_result) {
std::ostringstream sout;
sout << "NCCL error(" << nccl_result << "), "
<< platform::dynload::ncclGetErrorString(nccl_result) << ". ";
<< pten::dynload::ncclGetErrorString(nccl_result) << ". ";
if (errno == ENOSPC || errno == EAGAIN) {
std::string detail(strerror(errno));
detail += "\nPlease try one of the following solutions:";
......@@ -1090,7 +1090,7 @@ inline bool is_error(miopenStatus_t stat) {
inline std::string build_rocm_error_msg(miopenStatus_t stat) {
std::string msg(" Miopen error, ");
return msg + platform::dynload::miopenGetErrorString(stat) + " ";
return msg + pten::dynload::miopenGetErrorString(stat) + " ";
}
/***** ROCBLAS ERROR *****/
......@@ -1132,7 +1132,7 @@ inline bool is_error(ncclResult_t nccl_result) {
inline std::string build_rocm_error_msg(ncclResult_t nccl_result) {
std::string msg(" Rccl error, ");
return msg + platform::dynload::ncclGetErrorString(nccl_result) + " ";
return msg + pten::dynload::ncclGetErrorString(nccl_result) + " ";
}
#endif // not(__APPLE__) and PADDLE_WITH_NCCL
......@@ -1141,7 +1141,7 @@ inline bool is_error(hipfftResult_t stat) { return stat != HIPFFT_SUCCESS; }
inline std::string build_rocm_error_msg(hipfftResult_t stat) {
std::string msg(" HIPFFT error, ");
return msg + platform::dynload::hipfftGetErrorString(stat) + " ";
return msg + pten::dynload::hipfftGetErrorString(stat) + " ";
}
namespace details {
......
......@@ -24,6 +24,8 @@ limitations under the License. */
#include <unistd.h>
#elif defined(_MSC_VER)
#include <processthreadsapi.h>
#else
#include <unistd.h>
#endif
#include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN
......
......@@ -19,7 +19,7 @@ limitations under the License. */
#ifdef _POSIX_C_SOURCE
#include <time.h>
#endif
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle {
namespace platform {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include <stdlib.h>
#include "paddle/fluid/platform/port.h"
#include "paddle/pten/backends/dynload/port.h"
#ifdef _WIN32
static unsigned sleep(unsigned seconds) {
......
add_subdirectory(dynload)
add_subdirectory(cpu)
cc_library(pten_context SRCS all_context.cc DEPS device_context)
cc_library(pten_dynamic_loader SRCS dynamic_loader.cc DEPS enforce glog gflags)
list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc)
if (NOT WITH_NV_JETSON)
list(APPEND CUDA_SRCS nvjpeg.cc)
endif()
if (WITH_ROCM)
list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc hipfft.cc)
endif()
# There is no macOS version of NCCL.
# Disable nvrtc and cuda_driver api on MacOS, and only do a early test on Linux and Windows.
if (NOT APPLE)
list(APPEND CUDA_SRCS nvrtc.cc cuda_driver.cc)
if (WITH_NCCL)
list(APPEND CUDA_SRCS nccl.cc)
endif()
if (WITH_ROCM)
list(APPEND HIP_SRCS hiprtc.cc rocm_driver.cc)
if (WITH_RCCL)
list(APPEND HIP_SRCS rccl.cc)
endif()
endif()
endif()
if (TENSORRT_FOUND)
list(APPEND CUDA_SRCS tensorrt.cc)
endif()
configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc)
endif(CUPTI_FOUND)
if(WITH_ROCM)
hip_library(pten_dynload_cuda SRCS ${HIP_SRCS} DEPS pten_dynamic_loader)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc)
elseif (WITH_ASCEND_CL)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc npu_hccl)
else()
nv_library(pten_dynload_cuda SRCS ${CUDA_SRCS} DEPS pten_dynamic_loader)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc)
endif()
if (WITH_MKLML)
cc_library(pten_dynload_mklml SRCS mklml.cc DEPS pten_dynamic_loader mklml)
endif()
cc_library(pten_dynload_lapack SRCS lapack.cc DEPS pten_dynamic_loader)
add_dependencies(pten_dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if (MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(pten_dynload_mklrt SRCS mklrt.cc DEPS pten_dynamic_loader)
target_include_directories(pten_dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cublas.h"
namespace pten {
namespace dynload {
std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUBLAS_BLAS_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R2
CUBLAS_BLAS_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R3
CUBLAS_BLAS_ROUTINE_EACH_R3(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R4
CUBLAS_BLAS_ROUTINE_EACH_R4(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cublasXt.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cublas_dso_flag;
extern void *cublas_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublas routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublas_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublas_dso_flag, []() { \
cublas_dso_handle = pten::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublas_dso_handle, #__name); \
return reinterpret_cast<cublas_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSaxpy_v2); \
__macro(cublasDaxpy_v2); \
__macro(cublasCaxpy_v2); \
__macro(cublasZaxpy_v2); \
__macro(cublasSscal_v2); \
__macro(cublasDscal_v2); \
__macro(cublasScopy_v2); \
__macro(cublasDcopy_v2); \
__macro(cublasSgemv_v2); \
__macro(cublasDgemv_v2); \
__macro(cublasCgemv_v2); \
__macro(cublasZgemv_v2); \
__macro(cublasSgemm_v2); \
__macro(cublasDgemm_v2); \
__macro(cublasCgemm_v2); \
__macro(cublasZgemm_v2); \
__macro(cublasHgemm); \
__macro(cublasSgemmEx); \
__macro(cublasSgeam); \
__macro(cublasDgeam); \
__macro(cublasStrsm_v2); \
__macro(cublasDtrsm_v2); \
__macro(cublasCtrsm_v2); \
__macro(cublasZtrsm_v2); \
__macro(cublasCreate_v2); \
__macro(cublasDestroy_v2); \
__macro(cublasSetStream_v2); \
__macro(cublasSetPointerMode_v2); \
__macro(cublasGetPointerMode_v2); \
__macro(cublasSgemmBatched); \
__macro(cublasDgemmBatched); \
__macro(cublasCgemmBatched); \
__macro(cublasZgemmBatched); \
__macro(cublasStrsmBatched); \
__macro(cublasDtrsmBatched); \
__macro(cublasCtrsmBatched); \
__macro(cublasZtrsmBatched); \
__macro(cublasSgetrfBatched); \
__macro(cublasSgetriBatched); \
__macro(cublasDgetrfBatched); \
__macro(cublasDgetriBatched); \
__macro(cublasSmatinvBatched); \
__macro(cublasDmatinvBatched); \
__macro(cublasSgetrsBatched); \
__macro(cublasDgetrsBatched);
CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
// APIs available after CUDA 8.0
#if CUDA_VERSION >= 8000
#define CUBLAS_BLAS_ROUTINE_EACH_R2(__macro) \
__macro(cublasGemmEx); \
__macro(cublasSgemmStridedBatched); \
__macro(cublasDgemmStridedBatched); \
__macro(cublasCgemmStridedBatched); \
__macro(cublasZgemmStridedBatched); \
__macro(cublasHgemmStridedBatched);
CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.0
#if CUDA_VERSION >= 9000
#define CUBLAS_BLAS_ROUTINE_EACH_R3(__macro) \
__macro(cublasSetMathMode); \
__macro(cublasGetMathMode);
CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.1
#if CUDA_VERSION >= 9010
#define CUBLAS_BLAS_ROUTINE_EACH_R4(__macro) \
__macro(cublasGemmBatchedEx); \
__macro(cublasGemmStridedBatchedEx);
CUBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cublasLt.h"
namespace pten {
namespace dynload {
std::once_flag cublasLt_dso_flag;
void *cublasLt_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUBLASLT_BLAS_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cublasLt.h>
#include <cuda.h>
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cublasLt_dso_flag;
extern void *cublasLt_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublasLt routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublasLt_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublasLt_dso_flag, []() { \
cublasLt_dso_handle = pten::dynload::GetCublasLtDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublasLt_dso_handle, #__name); \
return reinterpret_cast<cublasLt_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
// APIs available after CUDA 10.1
// #if CUDA_VERSION >= 10100
#define CUBLASLT_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasLtCreate); \
__macro(cublasLtDestroy); \
__macro(cublasLtMatmul); \
__macro(cublasLtMatmulDescCreate); \
__macro(cublasLtMatmulDescDestroy); \
__macro(cublasLtMatmulDescSetAttribute); \
__macro(cublasLtMatrixLayoutCreate); \
__macro(cublasLtMatrixLayoutDestroy); \
__macro(cublasLtMatrixLayoutSetAttribute); \
__macro(cublasLtMatrixTransform); \
__macro(cublasLtMatrixTransformDescCreate); \
__macro(cublasLtMatrixTransformDescDestroy); \
__macro(cublasLtMatrixTransformDescSetAttribute);
CUBLASLT_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP)
// #endif
#undef DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cuda_driver.h"
namespace pten {
namespace dynload {
std::once_flag cuda_dso_flag;
void* cuda_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#if CUDA_VERSION >= 10020
CUDA_ROUTINE_EACH_VVM(DEFINE_WRAP);
#endif
CUDA_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() {
std::call_once(cuda_dso_flag, []() { cuda_dso_handle = GetCUDADsoHandle(); });
return cuda_dso_handle != nullptr;
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cuda_dso_flag;
extern void* cuda_dso_handle;
extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cuda_func = decltype(&::__name); \
std::call_once(cuda_dso_flag, []() { \
cuda_dso_handle = pten::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(cuda_dso_handle, #__name); \
return reinterpret_cast<cuda_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cuda driver functions
**/
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cuInit); \
__macro(cuDriverGetVersion); \
__macro(cuGetErrorString); \
__macro(cuModuleLoadData); \
__macro(cuModuleGetFunction); \
__macro(cuModuleUnload); \
__macro(cuOccupancyMaxActiveBlocksPerMultiprocessor); \
__macro(cuLaunchKernel); \
__macro(cuCtxCreate); \
__macro(cuCtxGetCurrent); \
__macro(cuDeviceGetCount); \
__macro(cuDevicePrimaryCtxGetState); \
__macro(cuDeviceGetAttribute); \
__macro(cuDeviceGet)
#if CUDA_VERSION >= 10020
#define CUDA_ROUTINE_EACH_VVM(__macro) \
__macro(cuMemGetAllocationGranularity); \
__macro(cuMemAddressReserve); \
__macro(cuMemCreate); \
__macro(cuMemMap); \
__macro(cuMemSetAccess); \
__macro(cuMemUnmap); \
__macro(cuMemRelease); \
__macro(cuMemAddressFree)
CUDA_ROUTINE_EACH_VVM(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#endif
CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUDA_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
namespace pten {
namespace dynload {
std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R8
CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
#endif
bool HasCUDNN() {
std::call_once(cudnn_dso_flag,
[]() { cudnn_dso_handle = GetCUDNNDsoHandle(); });
return cudnn_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle,
paddle::platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cudnn_dso_flag;
extern void* cudnn_dso_handle;
extern bool HasCUDNN();
extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = pten::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cudnn functions in HPPL
* different cudnn version has different interfaces
**/
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnSetTensorNdDescriptor); \
__macro(cudnnGetTensorNdDescriptor); \
__macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetFilterNdDescriptor); \
__macro(cudnnGetFilterNdDescriptor); \
__macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnSetPoolingNdDescriptor); \
__macro(cudnnGetPoolingNdDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnSetConvolutionNdDescriptor); \
__macro(cudnnGetConvolutionNdDescriptor); \
__macro(cudnnDeriveBNTensorDescriptor); \
__macro(cudnnCreateSpatialTransformerDescriptor); \
__macro(cudnnSetSpatialTransformerNdDescriptor); \
__macro(cudnnDestroySpatialTransformerDescriptor); \
__macro(cudnnSpatialTfGridGeneratorForward); \
__macro(cudnnSpatialTfGridGeneratorBackward); \
__macro(cudnnSpatialTfSamplerForward); \
__macro(cudnnSpatialTfSamplerBackward); \
__macro(cudnnCreate); \
__macro(cudnnDestroy); \
__macro(cudnnSetStream); \
__macro(cudnnActivationForward); \
__macro(cudnnActivationBackward); \
__macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \
__macro(cudnnTransformTensor); \
__macro(cudnnPoolingForward); \
__macro(cudnnPoolingBackward); \
__macro(cudnnSoftmaxBackward); \
__macro(cudnnSoftmaxForward); \
__macro(cudnnGetVersion); \
__macro(cudnnFindConvolutionForwardAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithm); \
__macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \
__macro(cudnnGetErrorString); \
__macro(cudnnCreateDropoutDescriptor); \
__macro(cudnnDropoutGetStatesSize); \
__macro(cudnnSetDropoutDescriptor); \
__macro(cudnnRestoreDropoutDescriptor); \
__macro(cudnnCreateRNNDescriptor); \
__macro(cudnnGetRNNParamsSize); \
__macro(cudnnGetRNNWorkspaceSize); \
__macro(cudnnGetRNNTrainingReserveSize); \
__macro(cudnnRNNForwardTraining); \
__macro(cudnnRNNBackwardData); \
__macro(cudnnRNNBackwardWeights); \
__macro(cudnnRNNForwardInference); \
__macro(cudnnDestroyDropoutDescriptor); \
__macro(cudnnDestroyRNNDescriptor); \
__macro(cudnnSetTensorNdDescriptorEx); \
__macro(cudnnAddTensor); \
__macro(cudnnConvolutionBackwardData); \
__macro(cudnnConvolutionBackwardFilter); \
__macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \
__macro(cudnnGetConvolutionBackwardDataWorkspaceSize); \
__macro(cudnnBatchNormalizationForwardTraining); \
__macro(cudnnBatchNormalizationForwardInference); \
__macro(cudnnBatchNormalizationBackward); \
__macro(cudnnCreateActivationDescriptor); \
__macro(cudnnSetActivationDescriptor); \
__macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor); \
__macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm); \
__macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnSetRNNDescriptor);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7001
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(cudnnSetConvolutionGroupCount); \
__macro(cudnnSetConvolutionMathType); \
__macro(cudnnConvolutionBiasActivationForward); \
__macro(cudnnCreateCTCLossDescriptor); \
__macro(cudnnDestroyCTCLossDescriptor); \
__macro(cudnnGetCTCLossDescriptor); \
__macro(cudnnSetCTCLossDescriptor); \
__macro(cudnnGetCTCLossWorkspaceSize); \
__macro(cudnnCTCLoss); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm_v7); \
__macro(cudnnGetConvolutionForwardAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount);
CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7201
#define CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(__macro) \
__macro(cudnnCreateRNNDataDescriptor); \
__macro(cudnnDestroyRNNDataDescriptor); \
__macro(cudnnSetRNNDataDescriptor); \
__macro(cudnnSetRNNPaddingMode); \
__macro(cudnnRNNForwardTrainingEx); \
__macro(cudnnRNNBackwardDataEx); \
__macro(cudnnRNNBackwardWeightsEx); \
__macro(cudnnRNNForwardInferenceEx);
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7401
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \
__macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \
__macro(cudnnBatchNormalizationForwardTrainingEx); \
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 8000
#define CUDNN_DNN_ROUTINE_EACH_R8(__macro) \
__macro(cudnnSetRNNDescriptor_v8); \
__macro(cudnnCreateFusedOpsPlan); \
__macro(cudnnCreateFusedOpsConstParamPack); \
__macro(cudnnCreateFusedOpsVariantParamPack); \
__macro(cudnnDestroyFusedOpsPlan); \
__macro(cudnnDestroyFusedOpsConstParamPack); \
__macro(cudnnDestroyFusedOpsVariantParamPack); \
__macro(cudnnFusedOpsExecute); \
__macro(cudnnSetFusedOpsConstParamPackAttribute); \
__macro(cudnnSetFusedOpsVariantParamPackAttribute); \
__macro(cudnnMakeFusedOpsPlan);
CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cufft.h"
#include "paddle/fluid/platform/enforce.h"
namespace pten {
namespace dynload {
std::once_flag cufft_dso_flag;
void* cufft_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUFFT() {
std::call_once(cufft_dso_flag,
[]() { cufft_dso_handle = GetCUFFTDsoHandle(); });
return cufft_dso_handle != nullptr;
}
void EnforceCUFFTLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cufft_dso_handle,
paddle::platform::errors::PreconditionNotMet(
"Cannot load cufft shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <cufft.h>
#include <cufftXt.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cufft_dso_flag;
extern void* cufft_dso_handle;
extern bool HasCUFFT();
extern void EnforceCUFFTLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cufft_func = decltype(&::__name); \
std::call_once(cufft_dso_flag, []() { \
cufft_dso_handle = pten::dynload::GetCUFFTDsoHandle(); \
}); \
EnforceCUFFTLoaded(#__name); \
static void* p_##__name = dlsym(cufft_dso_handle, #__name); \
return reinterpret_cast<cufft_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cufft functions in HPPL
* different cufft version has different interfaces
**/
#define CUFFT_FFT_ROUTINE_EACH(__macro) \
__macro(cufftPlan1d); \
__macro(cufftPlan2d); \
__macro(cufftPlan3d); \
__macro(cufftPlanMany); \
__macro(cufftMakePlan1d); \
__macro(cufftMakePlan2d); \
__macro(cufftMakePlan3d); \
__macro(cufftMakePlanMany); \
__macro(cufftMakePlanMany64); \
__macro(cufftGetSizeMany64); \
__macro(cufftEstimate1d); \
__macro(cufftEstimate2d); \
__macro(cufftEstimate3d); \
__macro(cufftEstimateMany); \
__macro(cufftCreate); \
__macro(cufftGetSize1d); \
__macro(cufftGetSize2d); \
__macro(cufftGetSize3d); \
__macro(cufftGetSizeMany); \
__macro(cufftGetSize); \
__macro(cufftSetWorkArea); \
__macro(cufftSetAutoAllocation); \
__macro(cufftExecC2C); \
__macro(cufftExecR2C); \
__macro(cufftExecC2R); \
__macro(cufftExecZ2Z); \
__macro(cufftExecD2Z); \
__macro(cufftExecZ2D); \
__macro(cufftSetStream); \
__macro(cufftDestroy); \
__macro(cufftGetVersion); \
__macro(cufftGetProperty); \
__macro(cufftXtSetGPUs); \
__macro(cufftXtMalloc); \
__macro(cufftXtMemcpy); \
__macro(cufftXtFree); \
__macro(cufftXtSetWorkArea); \
__macro(cufftXtExecDescriptorC2C); \
__macro(cufftXtExecDescriptorR2C); \
__macro(cufftXtExecDescriptorC2R); \
__macro(cufftXtExecDescriptorZ2Z); \
__macro(cufftXtExecDescriptorD2Z); \
__macro(cufftXtExecDescriptorZ2D); \
__macro(cufftXtQueryPlan); \
__macro(cufftXtSetCallback); \
__macro(cufftXtClearCallback); \
__macro(cufftXtSetCallbackSharedSize); \
__macro(cufftXtMakePlanMany); \
__macro(cufftXtGetSizeMany); \
__macro(cufftXtExec); \
__macro(cufftXtExecDescriptor); \
__macro(cufftXtSetWorkAreaPolicy);
CUFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUFFT_WRAP)
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUPTI
#include "paddle/pten/backends/dynload/cupti.h"
namespace pten {
namespace dynload {
std::once_flag cupti_dso_flag;
void *cupti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
#endif // PADDLE_WITH_CUPTI
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUPTI
#include <cuda.h>
#include <cupti.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cupti_dso_flag;
extern void *cupti_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cupti routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
using cuptiFunc = decltype(&::__name); \
std::call_once(cupti_dso_flag, []() { \
cupti_dso_handle = pten::dynload::GetCUPTIDsoHandle(); \
}); \
static void *p_##__name = dlsym(cupti_dso_handle, #__name); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUPTI_ROUTINE_EACH(__macro) \
__macro(cuptiActivityEnable); \
__macro(cuptiActivityDisable); \
__macro(cuptiActivityRegisterCallbacks); \
__macro(cuptiActivityGetAttribute); \
__macro(cuptiActivitySetAttribute); \
__macro(cuptiGetTimestamp); \
__macro(cuptiActivityGetNextRecord); \
__macro(cuptiGetResultString); \
__macro(cuptiActivityGetNumDroppedRecords); \
__macro(cuptiActivityFlushAll); \
__macro(cuptiSubscribe); \
__macro(cuptiUnsubscribe); \
__macro(cuptiEnableCallback); \
__macro(cuptiEnableDomain);
CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
} // namespace dynload
} // namespace pten
#endif // PADDLE_WITH_CUPTI
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define CUPTI_LIB_PATH "@CUPTI_LIBRARY_PATH@"
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/curand.h"
namespace pten {
namespace dynload {
std::once_flag curand_dso_flag;
void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <curand.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag curand_dso_flag;
extern void *curand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
using curandFunc = decltype(&::__name); \
std::call_once(curand_dso_flag, []() { \
curand_dso_handle = pten::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(curand_dso_handle, #__name); \
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator); \
__macro(curandSetStream); \
__macro(curandSetPseudoRandomGeneratorSeed); \
__macro(curandGenerateUniform); \
__macro(curandGenerateUniformDouble); \
__macro(curandGenerateNormal); \
__macro(curandDestroyGenerator);
CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cusolver.h"
namespace pten {
namespace dynload {
std::once_flag cusolver_dso_flag;
void *cusolver_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUSOLVER_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUSOLVER_ROUTINE_EACH_R1
CUSOLVER_ROUTINE_EACH_R1(DEFINE_WRAP);
#endif
#ifdef CUSOLVER_ROUTINE_EACH_R2
CUSOLVER_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <cusolverDn.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cusolver_dso_flag;
extern void *cusolver_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusolverStatus_t operator()(Args... args) { \
using cusolverFunc = decltype(&::__name); \
std::call_once(cusolver_dso_flag, []() { \
cusolver_dso_handle = pten::dynload::GetCusolverDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusolver_dso_handle, #__name); \
return reinterpret_cast<cusolverFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUSOLVER_ROUTINE_EACH(__macro) \
__macro(cusolverDnCreate); \
__macro(cusolverDnDestroy); \
__macro(cusolverDnSetStream); \
__macro(cusolverDnSpotrf_bufferSize); \
__macro(cusolverDnDpotrf_bufferSize); \
__macro(cusolverDnSpotrf); \
__macro(cusolverDnDpotrf); \
__macro(cusolverDnSpotrs); \
__macro(cusolverDnDpotrs); \
__macro(cusolverDnCpotrs); \
__macro(cusolverDnZpotrs); \
__macro(cusolverDnSsyevd_bufferSize); \
__macro(cusolverDnDsyevd_bufferSize); \
__macro(cusolverDnCheevd_bufferSize); \
__macro(cusolverDnZheevd_bufferSize); \
__macro(cusolverDnSsyevd); \
__macro(cusolverDnDsyevd); \
__macro(cusolverDnCheevd); \
__macro(cusolverDnZheevd);
CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
#if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R1(__macro) \
__macro(cusolverDnSpotrfBatched); \
__macro(cusolverDnDpotrfBatched); \
__macro(cusolverDnSpotrsBatched); \
__macro(cusolverDnDpotrsBatched); \
__macro(cusolverDnSgesvdj_bufferSize); \
__macro(cusolverDnSgetrf_bufferSize); \
__macro(cusolverDnDgetrf_bufferSize); \
__macro(cusolverDnCgetrf_bufferSize); \
__macro(cusolverDnZgetrf_bufferSize); \
__macro(cusolverDnSgeqrf_bufferSize); \
__macro(cusolverDnDgeqrf_bufferSize); \
__macro(cusolverDnCgeqrf_bufferSize); \
__macro(cusolverDnZgeqrf_bufferSize); \
__macro(cusolverDnSorgqr_bufferSize); \
__macro(cusolverDnDorgqr_bufferSize); \
__macro(cusolverDnSormqr_bufferSize); \
__macro(cusolverDnDormqr_bufferSize); \
__macro(cusolverDnCungqr_bufferSize); \
__macro(cusolverDnZungqr_bufferSize); \
__macro(cusolverDnDestroyGesvdjInfo); \
__macro(cusolverDnCreateGesvdjInfo); \
__macro(cusolverDnDgesvdj_bufferSize); \
__macro(cusolverDnSgesvdj); \
__macro(cusolverDnDgesvdj); \
__macro(cusolverDnSgetrf); \
__macro(cusolverDnDgetrf); \
__macro(cusolverDnCgetrf); \
__macro(cusolverDnZgetrf); \
__macro(cusolverDnSgeqrf); \
__macro(cusolverDnDgeqrf); \
__macro(cusolverDnCgeqrf); \
__macro(cusolverDnZgeqrf); \
__macro(cusolverDnSorgqr); \
__macro(cusolverDnDorgqr); \
__macro(cusolverDnSormqr); \
__macro(cusolverDnDormqr); \
__macro(cusolverDnCungqr); \
__macro(cusolverDnZungqr);
CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R2(__macro) \
__macro(cusolverDnCreateSyevjInfo); \
__macro(cusolverDnSsyevj_bufferSize); \
__macro(cusolverDnDsyevj_bufferSize); \
__macro(cusolverDnSsyevj); \
__macro(cusolverDnDsyevj); \
__macro(cusolverDnDestroySyevjInfo);
CUSOLVER_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cusparse.h"
namespace pten {
namespace dynload {
std::once_flag cusparse_dso_flag;
void *cusparse_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#ifdef CUSPARSE_ROUTINE_EACH
CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R2
CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
#ifdef CUSPARSE_ROUTINE_EACH_11020
CUSPARSE_ROUTINE_EACH_11020(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <cusparse.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cusparse_dso_flag;
extern void *cusparse_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusparseStatus_t operator()(Args... args) { \
using cusparseFunc = decltype(&::__name); \
std::call_once(cusparse_dso_flag, []() { \
cusparse_dso_handle = pten::dynload::GetCusparseDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \
return reinterpret_cast<cusparseFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#if defined(PADDLE_WITH_CUDA)
// The generic APIs is supported from CUDA10.1
#if CUDA_VERSION >= 10010
#define CUSPARSE_ROUTINE_EACH(__macro) \
__macro(cusparseCreate); \
__macro(cusparseSetStream); \
__macro(cusparseCreateMatDescr); \
__macro(cusparseDestroy); \
__macro(cusparseSnnz); \
__macro(cusparseDnnz); \
__macro(cusparseSetMatType); \
__macro(cusparseSetMatIndexBase);
CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
// APIs available after CUDA 11.2
#if CUDA_VERSION >= 11020
#define CUSPARSE_ROUTINE_EACH_11020(__macro) \
__macro(cusparseCreateCsr); \
__macro(cusparseCreateCoo); \
__macro(cusparseCreateDnMat); \
__macro(cusparseSpMM_bufferSize); \
__macro(cusparseSpMM); \
__macro(cusparseDestroySpMat); \
__macro(cusparseDestroyDnMat); \
__macro(cusparseCooSetPointers); \
__macro(cusparseCsrSetPointers); \
__macro(cusparseDenseToSparse_bufferSize); \
__macro(cusparseDenseToSparse_analysis); \
__macro(cusparseDenseToSparse_convert); \
__macro(cusparseSparseToDense_bufferSize); \
__macro(cusparseSparseToDense);
CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
// APIs available after CUDA 11.3
#if CUDA_VERSION >= 11030
#define CUSPARSE_ROUTINE_EACH_R2(__macro) \
__macro(cusparseSDDMM_bufferSize); \
__macro(cusparseSDDMM_preprocess); \
__macro(cusparseSDDMM);
CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
#endif
#endif
#endif
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
} // namespace dynload
} // namespace pten
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
namespace pten {
namespace dynload {
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void* GetCublasDsoHandle();
void* GetCublasLtDsoHandle();
void* GetCUDNNDsoHandle();
void* GetCUPTIDsoHandle();
void* GetCurandDsoHandle();
void* GetNvjpegDsoHandle();
void* GetCusolverDsoHandle();
void* GetCusparseDsoHandle();
void* GetNVRTCDsoHandle();
void* GetCUDADsoHandle();
void* GetWarpCTCDsoHandle();
void* GetNCCLDsoHandle();
void* GetHCCLDsoHandle();
void* GetTensorRtDsoHandle();
void* GetMKLMLDsoHandle();
void* GetLAPACKDsoHandle();
void* GetOpDsoHandle(const std::string& dso_name);
void* GetNvtxDsoHandle();
void* GetCUFFTDsoHandle();
void* GetMKLRTDsoHandle();
void* GetROCFFTDsoHandle();
void SetPaddleLibPath(const std::string&);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hipfft.h"
namespace pten {
namespace dynload {
std::once_flag hipfft_dso_flag;
void *hipfft_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_HIP
#include <hipfft.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag hipfft_dso_flag;
extern void *hipfft_dso_handle;
#define DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hipfftFunc = decltype(&::__name); \
std::call_once(hipfft_dso_flag, []() { \
hipfft_dso_handle = pten::dynload::GetROCFFTDsoHandle(); \
}); \
static void *p_##__name = dlsym(hipfft_dso_handle, #__name); \
return reinterpret_cast<hipfftFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define HIPFFT_FFT_ROUTINE_EACH(__macro) \
__macro(hipfftPlan1d); \
__macro(hipfftPlan2d); \
__macro(hipfftPlan3d); \
__macro(hipfftPlanMany); \
__macro(hipfftMakePlan1d); \
__macro(hipfftMakePlanMany); \
__macro(hipfftMakePlanMany64); \
__macro(hipfftGetSizeMany64); \
__macro(hipfftEstimate1d); \
__macro(hipfftEstimate2d); \
__macro(hipfftEstimate3d); \
__macro(hipfftEstimateMany); \
__macro(hipfftCreate); \
__macro(hipfftGetSize1d); \
__macro(hipfftGetSizeMany); \
__macro(hipfftGetSize); \
__macro(hipfftSetWorkArea); \
__macro(hipfftSetAutoAllocation); \
__macro(hipfftExecC2C); \
__macro(hipfftExecR2C); \
__macro(hipfftExecC2R); \
__macro(hipfftExecZ2Z); \
__macro(hipfftExecD2Z); \
__macro(hipfftExecZ2D); \
__macro(hipfftSetStream); \
__macro(hipfftDestroy); \
__macro(hipfftGetVersion); \
__macro(hipfftGetProperty);
HIPFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
inline const char *hipfftGetErrorString(hipfftResult_t status) {
switch (status) {
case HIPFFT_SUCCESS:
return "'HIPFFT_SUCCESS'. The hipFFT operation was successful.";
case HIPFFT_INVALID_PLAN:
return "'HIPFFT_INVALID_PLAN'. hipFFT was passed an invalid plan handle.";
case HIPFFT_ALLOC_FAILED:
return "'HIPFFT_ALLOC_FAILED'. hipFFT failed to allocate GPU or CPU "
"memory.";
case HIPFFT_INVALID_TYPE:
return "'HIPFFT_INVALID_TYPE'. No longer used.";
case HIPFFT_INVALID_VALUE:
return "'HIPFFT_INVALID_VALUE'. User specified an invalid pointer or "
"parameter.";
case HIPFFT_INTERNAL_ERROR:
return "'HIPFFT_INTERNAL_ERROR'. Driver or internal hipFFT library "
"error.";
case HIPFFT_EXEC_FAILED:
return "'HIPFFT_EXEC_FAILED'. Failed to execute an FFT on the GPU.";
case HIPFFT_SETUP_FAILED:
return "'HIPFFT_SETUP_FAILED'. The hipFFT library failed to initialize.";
case HIPFFT_INVALID_SIZE:
return "'HIPFFT_INVALID_SIZE'. User specified an invalid transform size.";
case HIPFFT_UNALIGNED_DATA:
return "'HIPFFT_UNALIGNED_DATA'. No longer used.";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:
return "'HIPFFT_INCOMPLETE_PARAMETER_LIST'. Missing parameters in call.";
case HIPFFT_INVALID_DEVICE:
return "'HIPFFT_INVALID_DEVICE'. Execution of a plan was on different "
"GPU than plan creation.";
case HIPFFT_PARSE_ERROR:
return "'HIPFFT_PARSE_ERROR'. Internal plan database error.";
case HIPFFT_NO_WORKSPACE:
return "'HIPFFT_NO_WORKSPACE'. No workspace has been provided prior to "
"plan execution.";
case HIPFFT_NOT_IMPLEMENTED:
return "'HIPFFT_NOT_IMPLEMENTED'. Function does not implement "
"functionality for parameters given.";
case HIPFFT_NOT_SUPPORTED:
return "'HIPFFT_NOT_SUPPORTED'. Operation is not supported for "
"parameters given.";
default:
return "HIPFFT_STATUS_UNKNOWN_ERROR";
}
}
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hiprand.h"
namespace pten {
namespace dynload {
std::once_flag hiprand_dso_flag;
void *hiprand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <hiprand.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/port.h"
#include "paddle/pten/backends/dynload/dynamic_loader.h"
namespace pten {
namespace dynload {
extern std::once_flag hiprand_dso_flag;
extern void *hiprand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
hiprandStatus_t operator()(Args... args) { \
using hiprandFunc = decltype(&::__name); \
std::call_once(hiprand_dso_flag, []() { \
hiprand_dso_handle = pten::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(hiprand_dso_handle, #__name); \
return reinterpret_cast<hiprandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define HIPRAND_RAND_ROUTINE_EACH(__macro) \
__macro(hiprandCreateGenerator); \
__macro(hiprandSetStream); \
__macro(hiprandSetPseudoRandomGeneratorSeed); \
__macro(hiprandGenerateUniform); \
__macro(hiprandGenerateUniformDouble); \
__macro(hiprandGenerateNormal); \
__macro(hiprandDestroyGenerator);
HIPRAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hiprtc.h"
namespace pten {
namespace dynload {
std::once_flag hiprtc_dso_flag;
void* hiprtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() {
std::call_once(hiprtc_dso_flag,
[]() { hiprtc_dso_handle = GetNVRTCDsoHandle(); });
return hiprtc_dso_handle != nullptr;
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <hip/hiprtc.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag hiprtc_dso_flag;
extern void* hiprtc_dso_handle;
extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hiprtc_func = decltype(&::__name); \
std::call_once(hiprtc_dso_flag, []() { \
hiprtc_dso_handle = pten::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(hiprtc_dso_handle, #__name); \
return reinterpret_cast<hiprtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed hiprtc functions
**/
#define HIPRTC_ROUTINE_EACH(__macro) \
__macro(hiprtcVersion); \
__macro(hiprtcGetErrorString); \
__macro(hiprtcCompileProgram); \
__macro(hiprtcCreateProgram); \
__macro(hiprtcDestroyProgram); \
__macro(hiprtcGetCode); \
__macro(hiprtcGetCodeSize); \
__macro(hiprtcGetProgramLog); \
__macro(hiprtcGetProgramLogSize)
HIPRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/lapack.h"
#include <mutex>
namespace pten {
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
LAPACK_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册