未验证 提交 3c1dc6f6 编写于 作者: W Wilber 提交者: GitHub

[PTEN] Move dynload from fluid to pten. (#39120)

* move dynload from fluid to pten.

* fix ci compile

* fix windows ci compile.

* update

* update

* fix compile error
上级 11938ae1
...@@ -27,7 +27,7 @@ limitations under the License. */ ...@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace butil { namespace butil {
class IOBuf; class IOBuf;
...@@ -78,11 +78,11 @@ void DeserializeFromMultiVarMsgAndIOBuf(const MultiVarMsg& multi_msg, ...@@ -78,11 +78,11 @@ void DeserializeFromMultiVarMsgAndIOBuf(const MultiVarMsg& multi_msg,
const framework::Scope* scope); const framework::Scope* scope);
void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
butil::IOBufBytesIterator& iobuf, butil::IOBufBytesIterator& iobuf, // NOLINT
const platform::DeviceContext& ctx); const platform::DeviceContext& ctx);
void DeserializeSelectedRows(framework::Variable* var, const VarMsg& msg, void DeserializeSelectedRows(framework::Variable* var, const VarMsg& msg,
butil::IOBufBytesIterator& iobuf, butil::IOBufBytesIterator& iobuf, // NOLINT
const platform::DeviceContext& ctx); const platform::DeviceContext& ctx);
std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port); std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port);
......
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
...@@ -202,7 +202,7 @@ class ValueBlock { ...@@ -202,7 +202,7 @@ class ValueBlock {
// value = _alloc.acquire(value_length_); // value = _alloc.acquire(value_length_);
table[id] = value; table[id] = value;
} else { } else {
value = (VALUE *)(void *)(res->second); value = (VALUE *)(void *)(res->second); // NOLINT
} }
return value; return value;
} }
...@@ -282,8 +282,8 @@ class ValueBlock { ...@@ -282,8 +282,8 @@ class ValueBlock {
value->unseen_days_++; value->unseen_days_++;
if (value->unseen_days_ >= threshold) { if (value->unseen_days_ >= threshold) {
butil::return_object(iter->second); butil::return_object(iter->second);
//_alloc.release(iter->second); // _alloc.release(iter->second);
//_alloc.release(value); // _alloc.release(value);
iter = table.erase(iter); iter = table.erase(iter);
} else { } else {
++iter; ++iter;
......
...@@ -38,8 +38,8 @@ limitations under the License. */ ...@@ -38,8 +38,8 @@ limitations under the License. */
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/timer.h" #include "paddle/fluid/platform/timer.h"
#include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "paddle/fluid/framework/io/shell.h" #include "paddle/fluid/framework/io/shell.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
......
...@@ -34,8 +34,8 @@ ...@@ -34,8 +34,8 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/backends/dynload/port.h"
#if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \ #if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \
defined(__ARM_NEON__) defined(__ARM_NEON__)
......
...@@ -34,7 +34,7 @@ limitations under the License. */ ...@@ -34,7 +34,7 @@ limitations under the License. */
#include "paddle/fluid/framework/trainer_desc.pb.h" #include "paddle/fluid/framework/trainer_desc.pb.h"
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
......
...@@ -28,7 +28,7 @@ limitations under the License. */ ...@@ -28,7 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -31,8 +31,8 @@ ...@@ -31,8 +31,8 @@
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/pten/backends/dynload/port.h"
extern std::string paddle::framework::DataTypeToString( extern std::string paddle::framework::DataTypeToString(
const framework::proto::VarType::Type type); const framework::proto::VarType::Type type);
......
...@@ -22,8 +22,8 @@ limitations under the License. */ ...@@ -22,8 +22,8 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/pten/backends/dynload/port.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
......
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
......
...@@ -27,7 +27,7 @@ limitations under the License. */ ...@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
#define HCOM_GROUP_PREFIX "HCOM_GROUP_" #define HCOM_GROUP_PREFIX "HCOM_GROUP_"
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce pten_dynamic_loader)
list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc) list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc)
...@@ -34,24 +34,24 @@ if (CUPTI_FOUND) ...@@ -34,24 +34,24 @@ if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc) list(APPEND CUDA_SRCS cupti.cc)
endif(CUPTI_FOUND) endif(CUPTI_FOUND)
if(WITH_ROCM) if(WITH_ROCM)
hip_library(dynload_cuda SRCS ${HIP_SRCS} DEPS dynamic_loader) hip_library(dynload_cuda SRCS ${HIP_SRCS} DEPS dynamic_loader pten_dynload_cuda)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc pten_dynload_warpctc)
elseif (WITH_ASCEND_CL) elseif (WITH_ASCEND_CL)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc npu_hccl) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc npu_hccl pten_dynload_warpctc)
else() else()
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader) nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader pten_dynload_cuda)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc pten_dynload_warpctc)
endif() endif()
if (WITH_MKLML) if (WITH_MKLML)
cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml) cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml pten_dynload_mklml)
endif() endif()
cc_library(dynload_lapack SRCS lapack.cc DEPS dynamic_loader) cc_library(dynload_lapack SRCS lapack.cc DEPS dynamic_loader pten_dynload_lapack)
add_dependencies(dynload_lapack extern_lapack) add_dependencies(dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn? # TODO(TJ): add iomp, mkldnn?
if (MKL_FOUND AND WITH_ONEMKL) if (MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader) cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader pten_dynload_mklrt)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif() endif()
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
......
...@@ -20,16 +20,12 @@ limitations under the License. */ ...@@ -20,16 +20,12 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <type_traits> #include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cublas.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cublas_dso_flag;
extern void *cublas_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load cublas routine * (for each function) to dynamic load cublas routine
...@@ -37,19 +33,8 @@ extern void *cublas_dso_handle; ...@@ -37,19 +33,8 @@ extern void *cublas_dso_handle;
* *
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublas_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublas_dso_flag, []() { \
cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublas_dso_handle, #__name); \
return reinterpret_cast<cublas_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
...@@ -99,7 +84,7 @@ extern void *cublas_dso_handle; ...@@ -99,7 +84,7 @@ extern void *cublas_dso_handle;
__macro(cublasSgetrsBatched); \ __macro(cublasSgetrsBatched); \
__macro(cublasDgetrsBatched); __macro(cublasDgetrsBatched);
CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) CUBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
// APIs available after CUDA 8.0 // APIs available after CUDA 8.0
#if CUDA_VERSION >= 8000 #if CUDA_VERSION >= 8000
...@@ -111,7 +96,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) ...@@ -111,7 +96,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasZgemmStridedBatched); \ __macro(cublasZgemmStridedBatched); \
__macro(cublasHgemmStridedBatched); __macro(cublasHgemmStridedBatched);
CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) CUBLAS_BLAS_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif #endif
// APIs available after CUDA 9.0 // APIs available after CUDA 9.0
...@@ -120,7 +105,7 @@ CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) ...@@ -120,7 +105,7 @@ CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasSetMathMode); \ __macro(cublasSetMathMode); \
__macro(cublasGetMathMode); __macro(cublasGetMathMode);
CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) CUBLAS_BLAS_ROUTINE_EACH_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif #endif
// APIs available after CUDA 9.1 // APIs available after CUDA 9.1
...@@ -129,10 +114,10 @@ CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) ...@@ -129,10 +114,10 @@ CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
__macro(cublasGemmBatchedEx); \ __macro(cublasGemmBatchedEx); \
__macro(cublasGemmStridedBatchedEx); __macro(cublasGemmStridedBatchedEx);
CUBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP) CUBLAS_BLAS_ROUTINE_EACH_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif #endif
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cublasLt_dso_flag;
void *cublasLt_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
......
...@@ -19,16 +19,12 @@ limitations under the License. */ ...@@ -19,16 +19,12 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <type_traits> #include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cublasLt.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cublasLt_dso_flag;
extern void *cublasLt_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load cublasLt routine * (for each function) to dynamic load cublasLt routine
...@@ -36,20 +32,8 @@ extern void *cublasLt_dso_handle; ...@@ -36,20 +32,8 @@ extern void *cublasLt_dso_handle;
* *
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#define DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublasLt_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublasLt_dso_flag, []() { \
cublasLt_dso_handle = \
paddle::platform::dynload::GetCublasLtDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublasLt_dso_handle, #__name); \
return reinterpret_cast<cublasLt_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
// APIs available after CUDA 10.1 // APIs available after CUDA 10.1
...@@ -69,10 +53,10 @@ extern void *cublasLt_dso_handle; ...@@ -69,10 +53,10 @@ extern void *cublasLt_dso_handle;
__macro(cublasLtMatrixTransformDescDestroy); \ __macro(cublasLtMatrixTransformDescDestroy); \
__macro(cublasLtMatrixTransformDescSetAttribute); __macro(cublasLtMatrixTransformDescSetAttribute);
CUBLASLT_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP) CUBLASLT_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP)
// #endif // #endif
#undef DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -13,14 +13,12 @@ See the License for the specific language governing permissions and ...@@ -13,14 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/cuda_driver.h" #include "paddle/fluid/platform/dynload/cuda_driver.h"
#include "paddle/pten/backends/dynload/cuda_driver.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cuda_dso_flag;
void* cuda_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
#if CUDA_VERSION >= 10020 #if CUDA_VERSION >= 10020
...@@ -28,10 +26,7 @@ CUDA_ROUTINE_EACH_VVM(DEFINE_WRAP); ...@@ -28,10 +26,7 @@ CUDA_ROUTINE_EACH_VVM(DEFINE_WRAP);
#endif #endif
CUDA_ROUTINE_EACH(DEFINE_WRAP); CUDA_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() { bool HasCUDADriver() { return pten::dynload::HasCUDADriver(); }
std::call_once(cuda_dso_flag, []() { cuda_dso_handle = GetCUDADsoHandle(); });
return cuda_dso_handle != nullptr;
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -17,30 +17,17 @@ limitations under the License. */ ...@@ -17,30 +17,17 @@ limitations under the License. */
#include <cuda.h> #include <cuda.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cuda_driver.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cuda_dso_flag;
extern void* cuda_dso_handle;
extern bool HasCUDADriver(); extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \ extern DynLoad__##__name __name
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cuda_func = decltype(&::__name); \
std::call_once(cuda_dso_flag, []() { \
cuda_dso_handle = paddle::platform::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(cuda_dso_handle, #__name); \
return reinterpret_cast<cuda_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed cuda driver functions * include all needed cuda driver functions
...@@ -72,12 +59,12 @@ extern bool HasCUDADriver(); ...@@ -72,12 +59,12 @@ extern bool HasCUDADriver();
__macro(cuMemRelease); \ __macro(cuMemRelease); \
__macro(cuMemAddressFree) __macro(cuMemAddressFree)
CUDA_ROUTINE_EACH_VVM(DECLARE_DYNAMIC_LOAD_CUDA_WRAP); CUDA_ROUTINE_EACH_VVM(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#endif #endif
CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP); CUDA_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUDA_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUDA_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/pten/backends/dynload/cudnn.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
...@@ -45,19 +43,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP); ...@@ -45,19 +43,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
#endif #endif
bool HasCUDNN() { bool HasCUDNN() { return pten::dynload::HasCUDNN(); }
std::call_once(cudnn_dso_flag,
[]() { cudnn_dso_handle = GetCUDNNDsoHandle(); });
return cudnn_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -18,32 +18,17 @@ limitations under the License. */ ...@@ -18,32 +18,17 @@ limitations under the License. */
#include <glog/logging.h> #include <glog/logging.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cudnn.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cudnn_dso_flag;
extern void* cudnn_dso_handle;
extern bool HasCUDNN(); extern bool HasCUDNN();
extern void EnforceCUDNNLoaded(const char* fn_name); #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
struct DynLoad__##__name { \ extern DynLoad__##__name __name
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed cudnn functions in HPPL * include all needed cudnn functions in HPPL
...@@ -127,7 +112,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -127,7 +112,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(cudnnGetActivationDescriptor); \ __macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor); \ __macro(cudnnDestroyActivationDescriptor); \
__macro(cudnnSetRNNDescriptor_v6); __macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000 #if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \
...@@ -135,7 +120,8 @@ CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -135,7 +120,8 @@ CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm); \ __macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnSetRNNDescriptor); __macro(cudnnSetRNNDescriptor);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(
PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
#if CUDNN_VERSION >= 7001 #if CUDNN_VERSION >= 7001
...@@ -153,7 +139,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -153,7 +139,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetConvolutionBackwardFilterAlgorithm_v7); \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm_v7); \
__macro(cudnnGetConvolutionForwardAlgorithm_v7); \ __macro(cudnnGetConvolutionForwardAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount); __macro(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount);
CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
#if CUDNN_VERSION >= 7201 #if CUDNN_VERSION >= 7201
...@@ -166,7 +152,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -166,7 +152,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnRNNBackwardDataEx); \ __macro(cudnnRNNBackwardDataEx); \
__macro(cudnnRNNBackwardWeightsEx); \ __macro(cudnnRNNBackwardWeightsEx); \
__macro(cudnnRNNForwardInferenceEx); __macro(cudnnRNNForwardInferenceEx);
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
#if CUDNN_VERSION >= 7401 #if CUDNN_VERSION >= 7401
...@@ -176,7 +162,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -176,7 +162,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \ __macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \ __macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize); __macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
#if CUDNN_VERSION >= 8000 #if CUDNN_VERSION >= 8000
...@@ -192,7 +178,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -192,7 +178,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
__macro(cudnnSetFusedOpsConstParamPackAttribute); \ __macro(cudnnSetFusedOpsConstParamPackAttribute); \
__macro(cudnnSetFusedOpsVariantParamPackAttribute); \ __macro(cudnnSetFusedOpsVariantParamPackAttribute); \
__macro(cudnnMakeFusedOpsPlan); __macro(cudnnMakeFusedOpsPlan);
CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_R8(PLATFORM_DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
} // namespace dynload } // namespace dynload
......
...@@ -13,31 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,31 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/cufft.h" #include "paddle/fluid/platform/dynload/cufft.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/pten/backends/dynload/cufft.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cufft_dso_flag;
void* cufft_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CUFFT_FFT_ROUTINE_EACH(DEFINE_WRAP); CUFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUFFT() { bool HasCUFFT() { return pten::dynload::HasCUFFT(); }
std::call_once(cufft_dso_flag,
[]() { cufft_dso_handle = GetCUFFTDsoHandle(); });
return cufft_dso_handle != nullptr;
}
void EnforceCUFFTLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cufft_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load cufft shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -19,32 +19,17 @@ limitations under the License. */ ...@@ -19,32 +19,17 @@ limitations under the License. */
#include <glog/logging.h> #include <glog/logging.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cufft.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cufft_dso_flag;
extern void* cufft_dso_handle;
extern bool HasCUFFT(); extern bool HasCUFFT();
extern void EnforceCUFFTLoaded(const char* fn_name); #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
struct DynLoad__##__name { \ extern DynLoad__##__name __name
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cufft_func = decltype(&::__name); \
std::call_once(cufft_dso_flag, []() { \
cufft_dso_handle = paddle::platform::dynload::GetCUFFTDsoHandle(); \
}); \
EnforceCUFFTLoaded(#__name); \
static void* p_##__name = dlsym(cufft_dso_handle, #__name); \
return reinterpret_cast<cufft_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed cufft functions in HPPL * include all needed cufft functions in HPPL
...@@ -104,7 +89,7 @@ extern void EnforceCUFFTLoaded(const char* fn_name); ...@@ -104,7 +89,7 @@ extern void EnforceCUFFTLoaded(const char* fn_name);
__macro(cufftXtExecDescriptor); \ __macro(cufftXtExecDescriptor); \
__macro(cufftXtSetWorkAreaPolicy); __macro(cufftXtSetWorkAreaPolicy);
CUFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUFFT_WRAP) CUFFT_FFT_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUFFT_WRAP)
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -20,9 +20,6 @@ namespace paddle { ...@@ -20,9 +20,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cupti_dso_flag;
void *cupti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CUPTI_ROUTINE_EACH(DEFINE_WRAP); CUPTI_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -19,16 +19,12 @@ limitations under the License. */ ...@@ -19,16 +19,12 @@ limitations under the License. */
#include <cupti.h> #include <cupti.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cupti.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cupti_dso_flag;
extern void *cupti_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load cupti routine * (for each function) to dynamic load cupti routine
...@@ -36,18 +32,8 @@ extern void *cupti_dso_handle; ...@@ -36,18 +32,8 @@ extern void *cupti_dso_handle;
* *
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
using cuptiFunc = decltype(&::__name); \
std::call_once(cupti_dso_flag, []() { \
cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle(); \
}); \
static void *p_##__name = dlsym(cupti_dso_handle, #__name); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define CUPTI_ROUTINE_EACH(__macro) \ #define CUPTI_ROUTINE_EACH(__macro) \
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag curand_dso_flag;
void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP); CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,27 +16,14 @@ limitations under the License. */ ...@@ -16,27 +16,14 @@ limitations under the License. */
#include <curand.h> #include <curand.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/curand.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag curand_dso_flag;
extern void *curand_dso_handle; #define PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
using curandFunc = decltype(&::__name); \
std::call_once(curand_dso_flag, []() { \
curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(curand_dso_handle, #__name); \
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define CURAND_RAND_ROUTINE_EACH(__macro) \ #define CURAND_RAND_ROUTINE_EACH(__macro) \
...@@ -48,7 +35,7 @@ extern void *curand_dso_handle; ...@@ -48,7 +35,7 @@ extern void *curand_dso_handle;
__macro(curandGenerateNormal); \ __macro(curandGenerateNormal); \
__macro(curandDestroyGenerator); __macro(curandDestroyGenerator);
CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); CURAND_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cusolver_dso_flag;
void *cusolver_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CUSOLVER_ROUTINE_EACH(DEFINE_WRAP); CUSOLVER_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -17,28 +17,14 @@ limitations under the License. */ ...@@ -17,28 +17,14 @@ limitations under the License. */
#include <cusolverDn.h> #include <cusolverDn.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cusolver.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cusolver_dso_flag;
extern void *cusolver_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
cusolverStatus_t operator()(Args... args) { \
using cusolverFunc = decltype(&::__name); \
std::call_once(cusolver_dso_flag, []() { \
cusolver_dso_handle = \
paddle::platform::dynload::GetCusolverDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusolver_dso_handle, #__name); \
return reinterpret_cast<cusolverFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define CUSOLVER_ROUTINE_EACH(__macro) \ #define CUSOLVER_ROUTINE_EACH(__macro) \
...@@ -62,7 +48,7 @@ extern void *cusolver_dso_handle; ...@@ -62,7 +48,7 @@ extern void *cusolver_dso_handle;
__macro(cusolverDnCheevd); \ __macro(cusolverDnCheevd); \
__macro(cusolverDnZheevd); __macro(cusolverDnZheevd);
CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP); CUSOLVER_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
#if CUDA_VERSION >= 9020 #if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R1(__macro) \ #define CUSOLVER_ROUTINE_EACH_R1(__macro) \
...@@ -105,7 +91,7 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP); ...@@ -105,7 +91,7 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
__macro(cusolverDnCungqr); \ __macro(cusolverDnCungqr); \
__macro(cusolverDnZungqr); __macro(cusolverDnZungqr);
CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP) CUSOLVER_ROUTINE_EACH_R1(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif #endif
#if CUDA_VERSION >= 9020 #if CUDA_VERSION >= 9020
...@@ -117,10 +103,10 @@ CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP) ...@@ -117,10 +103,10 @@ CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
__macro(cusolverDnDsyevj); \ __macro(cusolverDnDsyevj); \
__macro(cusolverDnDestroySyevjInfo); __macro(cusolverDnDestroySyevjInfo);
CUSOLVER_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP) CUSOLVER_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif #endif
#undef DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cusparse_dso_flag;
void *cusparse_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
#ifdef CUSPARSE_ROUTINE_EACH #ifdef CUSPARSE_ROUTINE_EACH
......
...@@ -17,28 +17,14 @@ limitations under the License. */ ...@@ -17,28 +17,14 @@ limitations under the License. */
#include <cusparse.h> #include <cusparse.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/cusparse.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag cusparse_dso_flag;
extern void *cusparse_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
cusparseStatus_t operator()(Args... args) { \
using cusparseFunc = decltype(&::__name); \
std::call_once(cusparse_dso_flag, []() { \
cusparse_dso_handle = \
paddle::platform::dynload::GetCusparseDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \
return reinterpret_cast<cusparseFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
...@@ -54,7 +40,7 @@ extern void *cusparse_dso_handle; ...@@ -54,7 +40,7 @@ extern void *cusparse_dso_handle;
__macro(cusparseSetMatType); \ __macro(cusparseSetMatType); \
__macro(cusparseSetMatIndexBase); __macro(cusparseSetMatIndexBase);
CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP); CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
// APIs available after CUDA 11.2 // APIs available after CUDA 11.2
#if CUDA_VERSION >= 11020 #if CUDA_VERSION >= 11020
...@@ -74,7 +60,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP); ...@@ -74,7 +60,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
__macro(cusparseSparseToDense_bufferSize); \ __macro(cusparseSparseToDense_bufferSize); \
__macro(cusparseSparseToDense); __macro(cusparseSparseToDense);
CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_11020(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
// APIs available after CUDA 11.3 // APIs available after CUDA 11.3
#if CUDA_VERSION >= 11030 #if CUDA_VERSION >= 11030
...@@ -83,13 +69,13 @@ CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) ...@@ -83,13 +69,13 @@ CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
__macro(cusparseSDDMM_preprocess); \ __macro(cusparseSDDMM_preprocess); \
__macro(cusparseSDDMM); __macro(cusparseSDDMM);
CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
#endif #endif
#endif #endif
#endif #endif
#endif #endif
#undef DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag hipfft_dso_flag;
void *hipfft_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP); HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -17,8 +17,7 @@ limitations under the License. */ ...@@ -17,8 +17,7 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/hipfft.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -26,18 +25,8 @@ namespace dynload { ...@@ -26,18 +25,8 @@ namespace dynload {
extern std::once_flag hipfft_dso_flag; extern std::once_flag hipfft_dso_flag;
extern void *hipfft_dso_handle; extern void *hipfft_dso_handle;
#define DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hipfftFunc = decltype(&::__name); \
std::call_once(hipfft_dso_flag, []() { \
hipfft_dso_handle = paddle::platform::dynload::GetROCFFTDsoHandle(); \
}); \
static void *p_##__name = dlsym(hipfft_dso_handle, #__name); \
return reinterpret_cast<hipfftFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define HIPFFT_FFT_ROUTINE_EACH(__macro) \ #define HIPFFT_FFT_ROUTINE_EACH(__macro) \
...@@ -70,53 +59,8 @@ extern void *hipfft_dso_handle; ...@@ -70,53 +59,8 @@ extern void *hipfft_dso_handle;
__macro(hipfftGetVersion); \ __macro(hipfftGetVersion); \
__macro(hipfftGetProperty); __macro(hipfftGetProperty);
HIPFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP); HIPFFT_FFT_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
inline const char *hipfftGetErrorString(hipfftResult_t status) {
switch (status) {
case HIPFFT_SUCCESS:
return "'HIPFFT_SUCCESS'. The hipFFT operation was successful.";
case HIPFFT_INVALID_PLAN:
return "'HIPFFT_INVALID_PLAN'. hipFFT was passed an invalid plan handle.";
case HIPFFT_ALLOC_FAILED:
return "'HIPFFT_ALLOC_FAILED'. hipFFT failed to allocate GPU or CPU "
"memory.";
case HIPFFT_INVALID_TYPE:
return "'HIPFFT_INVALID_TYPE'. No longer used.";
case HIPFFT_INVALID_VALUE:
return "'HIPFFT_INVALID_VALUE'. User specified an invalid pointer or "
"parameter.";
case HIPFFT_INTERNAL_ERROR:
return "'HIPFFT_INTERNAL_ERROR'. Driver or internal hipFFT library "
"error.";
case HIPFFT_EXEC_FAILED:
return "'HIPFFT_EXEC_FAILED'. Failed to execute an FFT on the GPU.";
case HIPFFT_SETUP_FAILED:
return "'HIPFFT_SETUP_FAILED'. The hipFFT library failed to initialize.";
case HIPFFT_INVALID_SIZE:
return "'HIPFFT_INVALID_SIZE'. User specified an invalid transform size.";
case HIPFFT_UNALIGNED_DATA:
return "'HIPFFT_UNALIGNED_DATA'. No longer used.";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:
return "'HIPFFT_INCOMPLETE_PARAMETER_LIST'. Missing parameters in call.";
case HIPFFT_INVALID_DEVICE:
return "'HIPFFT_INVALID_DEVICE'. Execution of a plan was on different "
"GPU than plan creation.";
case HIPFFT_PARSE_ERROR:
return "'HIPFFT_PARSE_ERROR'. Internal plan database error.";
case HIPFFT_NO_WORKSPACE:
return "'HIPFFT_NO_WORKSPACE'. No workspace has been provided prior to "
"plan execution.";
case HIPFFT_NOT_IMPLEMENTED:
return "'HIPFFT_NOT_IMPLEMENTED'. Function does not implement "
"functionality for parameters given.";
case HIPFFT_NOT_SUPPORTED:
return "'HIPFFT_NOT_SUPPORTED'. Operation is not supported for "
"parameters given.";
default:
return "HIPFFT_STATUS_UNKNOWN_ERROR";
}
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag hiprand_dso_flag;
void *hiprand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRAND_RAND_ROUTINE_EACH(DEFINE_WRAP); HIPRAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,28 +16,15 @@ limitations under the License. */ ...@@ -16,28 +16,15 @@ limitations under the License. */
#include <hiprand.h> #include <hiprand.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/hiprand.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag hiprand_dso_flag;
extern void *hiprand_dso_handle; #define PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
hiprandStatus_t operator()(Args... args) { \
using hiprandFunc = decltype(&::__name); \
std::call_once(hiprand_dso_flag, []() { \
hiprand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(hiprand_dso_handle, #__name); \
return reinterpret_cast<hiprandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define HIPRAND_RAND_ROUTINE_EACH(__macro) \ #define HIPRAND_RAND_ROUTINE_EACH(__macro) \
...@@ -49,7 +36,7 @@ extern void *hiprand_dso_handle; ...@@ -49,7 +36,7 @@ extern void *hiprand_dso_handle;
__macro(hiprandGenerateNormal); \ __macro(hiprandGenerateNormal); \
__macro(hiprandDestroyGenerator); __macro(hiprandDestroyGenerator);
HIPRAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); HIPRAND_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/hiprtc.h" #include "paddle/fluid/platform/dynload/hiprtc.h"
#include "paddle/pten/backends/dynload/hiprtc.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag hiprtc_dso_flag;
void* hiprtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRTC_ROUTINE_EACH(DEFINE_WRAP); HIPRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() { bool HasNVRTC() { return pten::dynload::HasNVRTC(); }
std::call_once(hiprtc_dso_flag,
[]() { hiprtc_dso_handle = GetNVRTCDsoHandle(); });
return hiprtc_dso_handle != nullptr;
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -16,30 +16,17 @@ limitations under the License. */ ...@@ -16,30 +16,17 @@ limitations under the License. */
#include <hip/hiprtc.h> #include <hip/hiprtc.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/hiprtc.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag hiprtc_dso_flag;
extern void* hiprtc_dso_handle;
extern bool HasNVRTC(); extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \ extern DynLoad__##__name __name
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hiprtc_func = decltype(&::__name); \
std::call_once(hiprtc_dso_flag, []() { \
hiprtc_dso_handle = paddle::platform::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(hiprtc_dso_handle, #__name); \
return reinterpret_cast<hiprtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed hiprtc functions * include all needed hiprtc functions
...@@ -55,9 +42,9 @@ extern bool HasNVRTC(); ...@@ -55,9 +42,9 @@ extern bool HasNVRTC();
__macro(hiprtcGetProgramLog); \ __macro(hiprtcGetProgramLog); \
__macro(hiprtcGetProgramLogSize) __macro(hiprtcGetProgramLogSize)
HIPRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP); HIPRTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -13,15 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/lapack.h" #include "paddle/fluid/platform/dynload/lapack.h"
#include <mutex>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
LAPACK_ROUTINE_EACH(DEFINE_WRAP); LAPACK_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,122 +16,20 @@ limitations under the License. */ ...@@ -16,122 +16,20 @@ limitations under the License. */
#include <complex> #include <complex>
#include <mutex> #include <mutex>
#include "paddle/fluid/platform/complex.h" #include "paddle/pten/backends/dynload/lapack.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/common/complex.h"
#include "paddle/fluid/platform/port.h"
// Note(zhouwei): because lapack doesn't provide appropriate header file.
// should expose API statement yourself.
// getrf_(For example)
extern "C" void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv,
int *info);
extern "C" void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv,
int *info);
// evd
extern "C" void zheevd_(char *jobz, char *uplo, int *n, std::complex<double> *a,
int *lda, double *w, std::complex<double> *work,
int *lwork, double *rwork, int *lrwork, int *iwork,
int *liwork, int *info);
extern "C" void cheevd_(char *jobz, char *uplo, int *n, std::complex<float> *a,
int *lda, float *w, std::complex<float> *work,
int *lwork, float *rwork, int *lrwork, int *iwork,
int *liwork, int *info);
extern "C" void dsyevd_(char *jobz, char *uplo, int *n, double *a, int *lda,
double *w, double *work, int *lwork, int *iwork,
int *liwork, int *info);
extern "C" void ssyevd_(char *jobz, char *uplo, int *n, float *a, int *lda,
float *w, float *work, int *lwork, int *iwork,
int *liwork, int *info);
// geev
extern "C" void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda,
double *wr, double *wi, double *vl, int *ldvl,
double *vr, int *ldvr, double *work, int *lwork,
int *info);
extern "C" void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda,
float *wr, float *wi, float *vl, int *ldvl, float *vr,
int *ldvr, float *work, int *lwork, int *info);
extern "C" void zgeev_(char *jobvl, char *jobvr, int *n,
std::complex<double> *a, int *lda,
std::complex<double> *w, std::complex<double> *vl,
int *ldvl, std::complex<double> *vr, int *ldvr,
std::complex<double> *work, int *lwork, double *rwork,
int *info);
extern "C" void cgeev_(char *jobvl, char *jobvr, int *n, std::complex<float> *a,
int *lda, std::complex<float> *w,
std::complex<float> *vl, int *ldvl,
std::complex<float> *vr, int *ldvr,
std::complex<float> *work, int *lwork, float *rwork,
int *info);
// gels
extern "C" void dgels_(char *trans, int *m, int *n, int *nrhs, double *a,
int *lda, double *b, int *ldb, double *work, int *lwork,
int *info);
extern "C" void sgels_(char *trans, int *m, int *n, int *nrhs, float *a,
int *lda, float *b, int *ldb, float *work, int *lwork,
int *info);
// gelsd
extern "C" void dgelsd_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, double *s, double *rcond,
int *rank, double *work, int *lwork, int *iwork,
int *info);
extern "C" void sgelsd_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, float *s, float *rcond, int *rank,
float *work, int *lwork, int *iwork, int *info);
// gelsy
extern "C" void dgelsy_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, int *jpvt, double *rcond,
int *rank, double *work, int *lwork, int *info);
extern "C" void sgelsy_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, int *jpvt, float *rcond, int *rank,
float *work, int *lwork, int *info);
// gelss
extern "C" void dgelss_(int *m, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, double *s, double *rcond,
int *rank, double *work, int *lwork, int *info);
extern "C" void sgelss_(int *m, int *n, int *nrhs, float *a, int *lda, float *b,
int *ldb, float *s, float *rcond, int *rank,
float *work, int *lwork, int *info);
extern "C" void zpotrs_(char *uplo, int *n, int *nrhs, std::complex<double> *a,
int *lda, std::complex<double> *b, int *ldb, int *info);
extern "C" void cpotrs_(char *uplo, int *n, int *nrhs, std::complex<float> *a,
int *lda, std::complex<float> *b, int *ldb, int *info);
extern "C" void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda,
double *b, int *ldb, int *info);
extern "C" void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda,
float *b, int *ldb, int *info);
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag lapack_dso_flag;
extern void *lapack_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load lapack routine * (for each function) to dynamic load lapack routine
* via operator overloading. * via operator overloading.
*/ */
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \ #define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using lapackFunc = decltype(&::__name); \
std::call_once(lapack_dso_flag, []() { \
lapack_dso_handle = paddle::platform::dynload::GetLAPACKDsoHandle(); \
}); \
static void *p_##_name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapackFunc>(p_##_name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_LAPACK_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_LAPACK_WRAP(__name) \
......
...@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/miopen.h" #include "paddle/fluid/platform/dynload/miopen.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/pten/backends/dynload/cudnn.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag miopen_dso_flag;
void* miopen_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
...@@ -50,19 +48,7 @@ MIOPEN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP); ...@@ -50,19 +48,7 @@ MIOPEN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP); MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
#endif #endif
bool HasCUDNN() { bool HasCUDNN() { return pten::dynload::HasCUDNN(); }
std::call_once(miopen_dso_flag,
[]() { miopen_dso_handle = GetCUDNNDsoHandle(); });
return miopen_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
miopen_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load miopen shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -18,66 +18,17 @@ limitations under the License. */ ...@@ -18,66 +18,17 @@ limitations under the License. */
#include <miopen/miopen.h> #include <miopen/miopen.h>
#include <miopen/version.h> #include <miopen/version.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/miopen.h"
#include "paddle/fluid/platform/port.h"
#define MIOPEN_VERSION \
(MIOPEN_VERSION_MAJOR * 1000 + MIOPEN_VERSION_MINOR * 10 + \
MIOPEN_VERSION_PATCH) // NOLINT
// MIOPEN only support NCHW, just for compatibility with CUDNN API
typedef enum {
MIOPEN_TENSOR_NCHW = 0,
MIOPEN_TENSOR_NHWC = 1,
} miopenTensorFormat_t;
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag miopen_dso_flag;
extern void* miopen_dso_handle;
extern bool HasCUDNN(); extern bool HasCUDNN();
inline const char* miopenGetErrorString(miopenStatus_t status) { #define PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP(__name) \
switch (status) { using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
case miopenStatusSuccess: extern DynLoad__##__name __name
return "MIOPEN_STATUS_SUCCESS";
case miopenStatusNotInitialized:
return "MIOPEN_STATUS_NOT_INITIALIZED";
case miopenStatusInvalidValue:
return "MIOPEN_STATUS_INVALID_VALUE";
case miopenStatusBadParm:
return "MIOPEN_STATUS_BAD_PARAM";
case miopenStatusAllocFailed:
return "MIOPEN_STATUS_ALLOC_FAILED";
case miopenStatusInternalError:
return "MIOPEN_STATUS_INTERNAL_ERROR";
case miopenStatusNotImplemented:
return "MIOPEN_STATUS_NOT_IMPLEMENTED";
case miopenStatusUnsupportedOp:
return "MIOPEN_STATUS_UNSUPPORTED_OP";
case miopenStatusUnknownError:
default:
return "MIOPEN_STATUS_UNKNOWN_ERROR";
}
}
extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using miopen_func = decltype(&::__name); \
std::call_once(miopen_dso_flag, []() { \
miopen_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(miopen_dso_handle, #__name); \
return reinterpret_cast<miopen_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed miopen functions in HPPL * include all needed miopen functions in HPPL
...@@ -145,23 +96,23 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -145,23 +96,23 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(miopenRNNForwardInference); \ __macro(miopenRNNForwardInference); \
__macro(miopenGetTensorNumBytes); __macro(miopenGetTensorNumBytes);
MIOPEN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_R2(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(miopenConvolutionBackwardData); __macro(miopenConvolutionBackwardData);
MIOPEN_DNN_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs available after R3: // APIs available after R3:
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \
__macro(miopenConvolutionBackwardWeightsGetWorkSpaceSize); __macro(miopenConvolutionBackwardWeightsGetWorkSpaceSize);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_AFTER_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs available after R4: // APIs available after R4:
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(miopenBatchNormalizationForwardTraining); \ __macro(miopenBatchNormalizationForwardTraining); \
__macro(miopenBatchNormalizationForwardInference); \ __macro(miopenBatchNormalizationForwardInference); \
__macro(miopenBatchNormalizationBackward); __macro(miopenBatchNormalizationBackward);
MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs in R5 // APIs in R5
#define MIOPEN_DNN_ROUTINE_EACH_R5(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_R5(__macro) \
...@@ -169,12 +120,12 @@ MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) ...@@ -169,12 +120,12 @@ MIOPEN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
__macro(miopenSetActivationDescriptor); \ __macro(miopenSetActivationDescriptor); \
__macro(miopenGetActivationDescriptor); \ __macro(miopenGetActivationDescriptor); \
__macro(miopenDestroyActivationDescriptor); __macro(miopenDestroyActivationDescriptor);
MIOPEN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_R5(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
// APIs in R6 // APIs in R6
#define MIOPEN_DNN_ROUTINE_EACH_R6(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_R6(__macro) \
/*__macro(miopenSetRNNDescriptor_v6);*/ /*__macro(miopenSetRNNDescriptor_v6);*/
MIOPEN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_R6(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_R7(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(miopenSetConvolutionGroupCount); \ __macro(miopenSetConvolutionGroupCount); \
...@@ -184,7 +135,7 @@ MIOPEN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) ...@@ -184,7 +135,7 @@ MIOPEN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
__macro(miopenSetCTCLossDescriptor); \ __macro(miopenSetCTCLossDescriptor); \
__macro(miopenGetCTCLossWorkspaceSize); \ __macro(miopenGetCTCLossWorkspaceSize); \
__macro(miopenCTCLoss); __macro(miopenCTCLoss);
MIOPEN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
#define MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \ #define MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \
/*__macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \ /*__macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \
...@@ -192,7 +143,7 @@ __macro(cudnnBatchNormalizationForwardTrainingEx); \ ...@@ -192,7 +143,7 @@ __macro(cudnnBatchNormalizationForwardTrainingEx); \
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \ __macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \ __macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);*/ __macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);*/
MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP) MIOPEN_DNN_ROUTINE_EACH_AFTER_R7(PLATFORM_DECLARE_DYNAMIC_LOAD_MIOPEN_WRAP)
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag mklml_dso_flag;
void* mklml_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
MKLML_ROUTINE_EACH(DEFINE_WRAP); MKLML_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -17,36 +17,23 @@ limitations under the License. */ ...@@ -17,36 +17,23 @@ limitations under the License. */
#include <mkl.h> #include <mkl.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/mklml.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag mklml_dso_flag;
extern void *mklml_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load mklml routine * (for each function) to dynamic load mklml routine
* via operator overloading. * via operator overloading.
*/ */
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \ #define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
}); \
static void *p_##_name = dlsym(mklml_dso_handle, #__name); \
return reinterpret_cast<mklmlFunc>(p_##_name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) DYNAMIC_LOAD_MKLML_WRAP(__name) #define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
DYNAMIC_LOAD_MKLML_WRAP(__name)
#define MKLML_ROUTINE_EACH(__macro) \ #define MKLML_ROUTINE_EACH(__macro) \
__macro(cblas_sgemm); \ __macro(cblas_sgemm); \
...@@ -111,7 +98,7 @@ extern void *mklml_dso_handle; ...@@ -111,7 +98,7 @@ extern void *mklml_dso_handle;
__macro(MKL_Set_Num_Threads); \ __macro(MKL_Set_Num_Threads); \
__macro(MKL_Get_Max_Threads); __macro(MKL_Get_Max_Threads);
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
#if !defined(_WIN32) #if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm); DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -32,18 +32,8 @@ extern void* mklrt_dso_handle; ...@@ -32,18 +32,8 @@ extern void* mklrt_dso_handle;
* (for each function) to dynamic load mkldfti routine * (for each function) to dynamic load mkldfti routine
* via operator overloading. * via operator overloading.
*/ */
#define DYNAMIC_LOAD_MKLRT_WRAP(__name) \ #define DYNAMIC_LOAD_MKLRT_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mklrtFunc = decltype(&::__name); \
std::call_once(mklrt_dso_flag, []() { \
mklrt_dso_handle = paddle::platform::dynload::GetMKLRTDsoHandle(); \
}); \
static void* p_##__name = dlsym(mklrt_dso_handle, #__name); \
return reinterpret_cast<mklrtFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
// mkl_dfti.h has a macro that shadows the function with the same name // mkl_dfti.h has a macro that shadows the function with the same name
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag nccl_dso_flag;
void *nccl_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,28 +16,14 @@ limitations under the License. */ ...@@ -16,28 +16,14 @@ limitations under the License. */
#include <nccl.h> #include <nccl.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/nccl.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag nccl_dso_flag; #define PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
extern void* nccl_dso_handle; using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(&::__name); \
std::call_once(nccl_dso_flag, []() { \
nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
}); \
static void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define NCCL_RAND_ROUTINE_EACH(__macro) \ #define NCCL_RAND_ROUTINE_EACH(__macro) \
...@@ -57,30 +43,30 @@ extern void* nccl_dso_handle; ...@@ -57,30 +43,30 @@ extern void* nccl_dso_handle;
__macro(ncclReduceScatter); \ __macro(ncclReduceScatter); \
__macro(ncclGetErrorString); __macro(ncclGetErrorString);
NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) NCCL_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#if NCCL_VERSION_CODE >= 2212 #if NCCL_VERSION_CODE >= 2212
#define NCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast); #define NCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast);
NCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) NCCL_RAND_ROUTINE_EACH_AFTER_2212(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif #endif
#if NCCL_VERSION_CODE >= 2304 #if NCCL_VERSION_CODE >= 2304
#define NCCL_RAND_ROUTINE_EACH_AFTER_2304(__macro) __macro(ncclGetVersion); #define NCCL_RAND_ROUTINE_EACH_AFTER_2304(__macro) __macro(ncclGetVersion);
NCCL_RAND_ROUTINE_EACH_AFTER_2304(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) NCCL_RAND_ROUTINE_EACH_AFTER_2304(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif #endif
#if NCCL_VERSION_CODE >= 2703 #if NCCL_VERSION_CODE >= 2703
#define NCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \ #define NCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \
__macro(ncclSend); \ __macro(ncclSend); \
__macro(ncclRecv); __macro(ncclRecv);
NCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) NCCL_RAND_ROUTINE_EACH_AFTER_2703(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif #endif
#if NCCL_VERSION_CODE >= 21100 #if NCCL_VERSION_CODE >= 21100
#define NCCL_RAND_ROUTINE_EACH_AFTER_21100(__macro) \ #define NCCL_RAND_ROUTINE_EACH_AFTER_21100(__macro) \
__macro(ncclRedOpCreatePreMulSum); \ __macro(ncclRedOpCreatePreMulSum); \
__macro(ncclRedOpDestroy); __macro(ncclRedOpDestroy);
NCCL_RAND_ROUTINE_EACH_AFTER_21100(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) NCCL_RAND_ROUTINE_EACH_AFTER_21100(PLATFORM_DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
#endif #endif
} // namespace dynload } // namespace dynload
......
...@@ -15,9 +15,6 @@ namespace paddle { ...@@ -15,9 +15,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag nvjpeg_dso_flag;
void *nvjpeg_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
NVJPEG_RAND_ROUTINE_EACH(DEFINE_WRAP); NVJPEG_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -14,27 +14,14 @@ limitations under the License. */ ...@@ -14,27 +14,14 @@ limitations under the License. */
#include <nvjpeg.h> #include <nvjpeg.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/nvjpeg.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag nvjpeg_dso_flag;
extern void *nvjpeg_dso_handle; #define PLATFORM_DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
nvjpegStatus_t operator()(Args... args) { \
using nvjpegFunc = decltype(&::__name); \
std::call_once(nvjpeg_dso_flag, []() { \
nvjpeg_dso_handle = paddle::platform::dynload::GetNvjpegDsoHandle(); \
}); \
static void *p_##__name = dlsym(nvjpeg_dso_handle, #__name); \
return reinterpret_cast<nvjpegFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define NVJPEG_RAND_ROUTINE_EACH(__macro) \ #define NVJPEG_RAND_ROUTINE_EACH(__macro) \
...@@ -44,7 +31,7 @@ extern void *nvjpeg_dso_handle; ...@@ -44,7 +31,7 @@ extern void *nvjpeg_dso_handle;
__macro(nvjpegJpegStateDestroy); \ __macro(nvjpegJpegStateDestroy); \
__macro(nvjpegDecode); __macro(nvjpegDecode);
NVJPEG_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP); NVJPEG_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP);
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,23 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/nvrtc.h" #include "paddle/fluid/platform/dynload/nvrtc.h"
#include "paddle/pten/backends/dynload/nvrtc.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag nvrtc_dso_flag;
void* nvrtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
NVRTC_ROUTINE_EACH(DEFINE_WRAP); NVRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() { bool HasNVRTC() { return pten::dynload::HasNVRTC(); }
std::call_once(nvrtc_dso_flag,
[]() { nvrtc_dso_handle = GetNVRTCDsoHandle(); });
return nvrtc_dso_handle != nullptr;
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -17,30 +17,17 @@ limitations under the License. */ ...@@ -17,30 +17,17 @@ limitations under the License. */
#include <nvrtc.h> #include <nvrtc.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/nvrtc.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag nvrtc_dso_flag;
extern void* nvrtc_dso_handle;
extern bool HasNVRTC(); extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \ extern DynLoad__##__name __name
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using nvrtc_func = decltype(&::__name); \
std::call_once(nvrtc_dso_flag, []() { \
nvrtc_dso_handle = paddle::platform::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(nvrtc_dso_handle, #__name); \
return reinterpret_cast<nvrtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed nvrtc functions * include all needed nvrtc functions
...@@ -56,9 +43,9 @@ extern bool HasNVRTC(); ...@@ -56,9 +43,9 @@ extern bool HasNVRTC();
__macro(nvrtcGetProgramLog); \ __macro(nvrtcGetProgramLog); \
__macro(nvrtcGetProgramLogSize) __macro(nvrtcGetProgramLogSize)
NVRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVRTC_WRAP); NVRTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_NVRTC_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag nvtx_dso_flag;
void *nvtx_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
NVTX_ROUTINE_EACH(DEFINE_WRAP); NVTX_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -17,36 +17,23 @@ limitations under the License. */ ...@@ -17,36 +17,23 @@ limitations under the License. */
#include <nvToolsExt.h> #include <nvToolsExt.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/nvtx.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag nvtx_dso_flag;
extern void *nvtx_dso_handle; #define PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP(__name) \
using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_NVTX_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
int operator()(Args... args) { \
using nvtxFunc = decltype(&::__name); \
std::call_once(nvtx_dso_flag, []() { \
nvtx_dso_handle = paddle::platform::dynload::GetNvtxDsoHandle(); \
}); \
static void *p_##__name = dlsym(nvtx_dso_handle, #__name); \
return reinterpret_cast<nvtxFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define NVTX_ROUTINE_EACH(__macro) \ #define NVTX_ROUTINE_EACH(__macro) \
__macro(nvtxRangePushA); \ __macro(nvtxRangePushA); \
__macro(nvtxRangePop); __macro(nvtxRangePop);
NVTX_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVTX_WRAP); NVTX_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP);
#undef DECLARE_DYNAMIC_LOAD_NVTX_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag rccl_dso_flag;
void *rccl_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
RCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); RCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,28 +16,14 @@ limitations under the License. */ ...@@ -16,28 +16,14 @@ limitations under the License. */
#include <rccl.h> #include <rccl.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/rccl.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag rccl_dso_flag; #define PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP(__name) \
extern void* rccl_dso_handle; using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
#define DECLARE_DYNAMIC_LOAD_RCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(&::__name); \
std::call_once(rccl_dso_flag, []() { \
rccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
}); \
static void* p_##__name = dlsym(rccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define RCCL_RAND_ROUTINE_EACH(__macro) \ #define RCCL_RAND_ROUTINE_EACH(__macro) \
...@@ -57,18 +43,18 @@ extern void* rccl_dso_handle; ...@@ -57,18 +43,18 @@ extern void* rccl_dso_handle;
__macro(ncclReduceScatter); \ __macro(ncclReduceScatter); \
__macro(ncclGetErrorString); __macro(ncclGetErrorString);
RCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) RCCL_RAND_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#if NCCL_VERSION_CODE >= 2212 #if NCCL_VERSION_CODE >= 2212
#define RCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast); #define RCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(ncclBroadcast);
RCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) RCCL_RAND_ROUTINE_EACH_AFTER_2212(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#endif #endif
#if NCCL_VERSION_CODE >= 2703 #if NCCL_VERSION_CODE >= 2703
#define RCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \ #define RCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \
__macro(ncclSend); \ __macro(ncclSend); \
__macro(ncclRecv); __macro(ncclRecv);
RCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) RCCL_RAND_ROUTINE_EACH_AFTER_2703(PLATFORM_DECLARE_DYNAMIC_LOAD_RCCL_WRAP)
#endif #endif
} // namespace dynload } // namespace dynload
......
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag rocblas_dso_flag;
void *rocblas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
......
...@@ -19,16 +19,12 @@ limitations under the License. */ ...@@ -19,16 +19,12 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <type_traits> #include <type_traits>
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/rocblas.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag rocblas_dso_flag;
extern void *rocblas_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load cublas routine * (for each function) to dynamic load cublas routine
...@@ -36,18 +32,8 @@ extern void *rocblas_dso_handle; ...@@ -36,18 +32,8 @@ extern void *rocblas_dso_handle;
* *
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#define DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
rocblas_status operator()(Args... args) { \
using rocblas_func = decltype(&::__name); \
std::call_once(rocblas_dso_flag, []() { \
rocblas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(rocblas_dso_handle, #__name); \
return reinterpret_cast<rocblas_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define ROCBLAS_BLAS_ROUTINE_EACH(__macro) \ #define ROCBLAS_BLAS_ROUTINE_EACH(__macro) \
...@@ -83,7 +69,7 @@ extern void *rocblas_dso_handle; ...@@ -83,7 +69,7 @@ extern void *rocblas_dso_handle;
__macro(rocblas_set_pointer_mode); \ __macro(rocblas_set_pointer_mode); \
__macro(rocblas_get_pointer_mode); __macro(rocblas_get_pointer_mode);
ROCBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP) ROCBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
// APIs available after CUDA 8.0 // APIs available after CUDA 8.0
#define ROCBLAS_BLAS_ROUTINE_EACH_R2(__macro) \ #define ROCBLAS_BLAS_ROUTINE_EACH_R2(__macro) \
...@@ -94,21 +80,21 @@ ROCBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP) ...@@ -94,21 +80,21 @@ ROCBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
__macro(rocblas_zgemm_strided_batched); \ __macro(rocblas_zgemm_strided_batched); \
__macro(rocblas_hgemm_strided_batched); __macro(rocblas_hgemm_strided_batched);
ROCBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP) ROCBLAS_BLAS_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
// HIP not supported in ROCM3.5 // HIP not supported in ROCM3.5
// #define ROCBLAS_BLAS_ROUTINE_EACH_R3(__macro) // #define ROCBLAS_BLAS_ROUTINE_EACH_R3(__macro)
// __macro(cublasSetMathMode); // __macro(cublasSetMathMode);
// __macro(cublasGetMathMode); // __macro(cublasGetMathMode);
// ROCBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP) // ROCBLAS_BLAS_ROUTINE_EACH_R3(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
#define ROCBLAS_BLAS_ROUTINE_EACH_R4(__macro) \ #define ROCBLAS_BLAS_ROUTINE_EACH_R4(__macro) \
__macro(rocblas_gemm_batched_ex); \ __macro(rocblas_gemm_batched_ex); \
__macro(rocblas_gemm_strided_batched_ex); __macro(rocblas_gemm_strided_batched_ex);
ROCBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP) ROCBLAS_BLAS_ROUTINE_EACH_R4(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP)
#undef DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_ROCBLAS_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/rocm_driver.h" #include "paddle/fluid/platform/dynload/rocm_driver.h"
#include "paddle/pten/backends/dynload/rocm_driver.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag rocm_dso_flag;
void* rocm_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
ROCM_ROUTINE_EACH(DEFINE_WRAP); ROCM_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() { bool HasCUDADriver() { return pten::dynload::HasCUDADriver(); }
std::call_once(rocm_dso_flag, []() { rocm_dso_handle = GetCUDADsoHandle(); });
return rocm_dso_handle != nullptr;
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -17,30 +17,17 @@ limitations under the License. */ ...@@ -17,30 +17,17 @@ limitations under the License. */
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/rocm_driver.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag rocm_dso_flag;
extern void* rocm_dso_handle;
extern bool HasCUDADriver(); extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_ROCM_WRAP(__name) \ #define PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \ extern DynLoad__##__name __name
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using rocm_func = decltype(&::__name); \
std::call_once(rocm_dso_flag, []() { \
rocm_dso_handle = paddle::platform::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(rocm_dso_handle, #__name); \
return reinterpret_cast<rocm_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/** /**
* include all needed cuda driver functions * include all needed cuda driver functions
...@@ -59,9 +46,9 @@ extern bool HasCUDADriver(); ...@@ -59,9 +46,9 @@ extern bool HasCUDADriver();
__macro(hipGetDeviceCount); \ __macro(hipGetDeviceCount); \
__macro(hipDevicePrimaryCtxGetState) __macro(hipDevicePrimaryCtxGetState)
ROCM_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_ROCM_WRAP); ROCM_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP);
#undef DECLARE_DYNAMIC_LOAD_ROCM_WRAP #undef PLATFORM_DECLARE_DYNAMIC_LOAD_ROCM_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
......
...@@ -18,9 +18,6 @@ namespace paddle { ...@@ -18,9 +18,6 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag warpctc_dso_flag;
void* warpctc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
WARPCTC_ROUTINE_EACH(DEFINE_WRAP); WARPCTC_ROUTINE_EACH(DEFINE_WRAP);
......
...@@ -16,34 +16,19 @@ limitations under the License. */ ...@@ -16,34 +16,19 @@ limitations under the License. */
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/pten/backends/dynload/warpctc.h"
#include "paddle/fluid/platform/port.h"
#include "warpctc/include/ctc.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
extern std::once_flag warpctc_dso_flag;
extern void* warpctc_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load warpctc routine * (for each function) to dynamic load warpctc routine
* via operator overloading. * via operator overloading.
*/ */
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ #define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \ using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
}); \
static void* p_##_name = dlsym(warpctc_dso_handle, #__name); \
return reinterpret_cast<warpctcFunc>(p_##_name)(args...); \
} \
}; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
......
...@@ -65,30 +65,30 @@ limitations under the License. */ ...@@ -65,30 +65,30 @@ limitations under the License. */
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/variant.h" #include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/to_string.h" #include "paddle/fluid/string/to_string.h"
#include "paddle/pten/backends/dynload/port.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/pten/backends/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/pten/backends/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h" #include "paddle/pten/backends/dynload/curand.h"
#include "paddle/fluid/platform/dynload/cusolver.h" #include "paddle/pten/backends/dynload/cusolver.h"
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
#include <error.h> #include <error.h>
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/pten/backends/dynload/nccl.h"
#endif // __APPLE__ #endif // __APPLE__
#endif // PADDLE_WITH_CUDA #endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/dynload/hipfft.h" #include "paddle/pten/backends/dynload/hipfft.h"
#include "paddle/fluid/platform/dynload/hiprand.h" #include "paddle/pten/backends/dynload/hiprand.h"
#include "paddle/fluid/platform/dynload/miopen.h" #include "paddle/pten/backends/dynload/miopen.h"
#include "paddle/fluid/platform/dynload/rocblas.h" #include "paddle/pten/backends/dynload/rocblas.h"
#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL)
#include <error.h> // NOLINT #include <error.h> // NOLINT
#include "paddle/fluid/platform/dynload/rccl.h" #include "paddle/pten/backends/dynload/rccl.h"
#endif // __APPLE__ #endif // __APPLE__
#endif // PADDLE_WITH_HIP #endif // PADDLE_WITH_HIP
...@@ -880,7 +880,7 @@ inline bool is_error(cudnnStatus_t stat) { ...@@ -880,7 +880,7 @@ inline bool is_error(cudnnStatus_t stat) {
inline std::string build_nvidia_error_msg(cudnnStatus_t stat) { inline std::string build_nvidia_error_msg(cudnnStatus_t stat) {
std::ostringstream sout; std::ostringstream sout;
sout << "CUDNN error(" << stat << "), " sout << "CUDNN error(" << stat << "), "
<< platform::dynload::cudnnGetErrorString(stat) << ". " << pten::dynload::cudnnGetErrorString(stat) << ". "
<< GetExternalErrorMsg(stat); << GetExternalErrorMsg(stat);
return sout.str(); return sout.str();
} }
...@@ -945,7 +945,7 @@ inline bool is_error(ncclResult_t nccl_result) { ...@@ -945,7 +945,7 @@ inline bool is_error(ncclResult_t nccl_result) {
inline std::string build_nvidia_error_msg(ncclResult_t nccl_result) { inline std::string build_nvidia_error_msg(ncclResult_t nccl_result) {
std::ostringstream sout; std::ostringstream sout;
sout << "NCCL error(" << nccl_result << "), " sout << "NCCL error(" << nccl_result << "), "
<< platform::dynload::ncclGetErrorString(nccl_result) << ". "; << pten::dynload::ncclGetErrorString(nccl_result) << ". ";
if (errno == ENOSPC || errno == EAGAIN) { if (errno == ENOSPC || errno == EAGAIN) {
std::string detail(strerror(errno)); std::string detail(strerror(errno));
detail += "\nPlease try one of the following solutions:"; detail += "\nPlease try one of the following solutions:";
...@@ -1090,7 +1090,7 @@ inline bool is_error(miopenStatus_t stat) { ...@@ -1090,7 +1090,7 @@ inline bool is_error(miopenStatus_t stat) {
inline std::string build_rocm_error_msg(miopenStatus_t stat) { inline std::string build_rocm_error_msg(miopenStatus_t stat) {
std::string msg(" Miopen error, "); std::string msg(" Miopen error, ");
return msg + platform::dynload::miopenGetErrorString(stat) + " "; return msg + pten::dynload::miopenGetErrorString(stat) + " ";
} }
/***** ROCBLAS ERROR *****/ /***** ROCBLAS ERROR *****/
...@@ -1132,7 +1132,7 @@ inline bool is_error(ncclResult_t nccl_result) { ...@@ -1132,7 +1132,7 @@ inline bool is_error(ncclResult_t nccl_result) {
inline std::string build_rocm_error_msg(ncclResult_t nccl_result) { inline std::string build_rocm_error_msg(ncclResult_t nccl_result) {
std::string msg(" Rccl error, "); std::string msg(" Rccl error, ");
return msg + platform::dynload::ncclGetErrorString(nccl_result) + " "; return msg + pten::dynload::ncclGetErrorString(nccl_result) + " ";
} }
#endif // not(__APPLE__) and PADDLE_WITH_NCCL #endif // not(__APPLE__) and PADDLE_WITH_NCCL
...@@ -1141,7 +1141,7 @@ inline bool is_error(hipfftResult_t stat) { return stat != HIPFFT_SUCCESS; } ...@@ -1141,7 +1141,7 @@ inline bool is_error(hipfftResult_t stat) { return stat != HIPFFT_SUCCESS; }
inline std::string build_rocm_error_msg(hipfftResult_t stat) { inline std::string build_rocm_error_msg(hipfftResult_t stat) {
std::string msg(" HIPFFT error, "); std::string msg(" HIPFFT error, ");
return msg + platform::dynload::hipfftGetErrorString(stat) + " "; return msg + pten::dynload::hipfftGetErrorString(stat) + " ";
} }
namespace details { namespace details {
......
...@@ -24,6 +24,8 @@ limitations under the License. */ ...@@ -24,6 +24,8 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#include <processthreadsapi.h> #include <processthreadsapi.h>
#else
#include <unistd.h>
#endif #endif
#include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN #include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#ifdef _POSIX_C_SOURCE #ifdef _POSIX_C_SOURCE
#include <time.h> #include <time.h>
#endif #endif
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <stdlib.h> #include <stdlib.h>
#include "paddle/fluid/platform/port.h" #include "paddle/pten/backends/dynload/port.h"
#ifdef _WIN32 #ifdef _WIN32
static unsigned sleep(unsigned seconds) { static unsigned sleep(unsigned seconds) {
......
add_subdirectory(dynload)
add_subdirectory(cpu) add_subdirectory(cpu)
cc_library(pten_context SRCS all_context.cc DEPS device_context) cc_library(pten_context SRCS all_context.cc DEPS device_context)
cc_library(pten_dynamic_loader SRCS dynamic_loader.cc DEPS enforce glog gflags)
list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc)
if (NOT WITH_NV_JETSON)
list(APPEND CUDA_SRCS nvjpeg.cc)
endif()
if (WITH_ROCM)
list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc hipfft.cc)
endif()
# There is no macOS version of NCCL.
# Disable nvrtc and cuda_driver api on MacOS, and only do a early test on Linux and Windows.
if (NOT APPLE)
list(APPEND CUDA_SRCS nvrtc.cc cuda_driver.cc)
if (WITH_NCCL)
list(APPEND CUDA_SRCS nccl.cc)
endif()
if (WITH_ROCM)
list(APPEND HIP_SRCS hiprtc.cc rocm_driver.cc)
if (WITH_RCCL)
list(APPEND HIP_SRCS rccl.cc)
endif()
endif()
endif()
if (TENSORRT_FOUND)
list(APPEND CUDA_SRCS tensorrt.cc)
endif()
configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc)
endif(CUPTI_FOUND)
if(WITH_ROCM)
hip_library(pten_dynload_cuda SRCS ${HIP_SRCS} DEPS pten_dynamic_loader)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc)
elseif (WITH_ASCEND_CL)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc npu_hccl)
else()
nv_library(pten_dynload_cuda SRCS ${CUDA_SRCS} DEPS pten_dynamic_loader)
cc_library(pten_dynload_warpctc SRCS warpctc.cc DEPS pten_dynamic_loader warpctc)
endif()
if (WITH_MKLML)
cc_library(pten_dynload_mklml SRCS mklml.cc DEPS pten_dynamic_loader mklml)
endif()
cc_library(pten_dynload_lapack SRCS lapack.cc DEPS pten_dynamic_loader)
add_dependencies(pten_dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if (MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(pten_dynload_mklrt SRCS mklrt.cc DEPS pten_dynamic_loader)
target_include_directories(pten_dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cublas.h"
namespace pten {
namespace dynload {
std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUBLAS_BLAS_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R2
CUBLAS_BLAS_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R3
CUBLAS_BLAS_ROUTINE_EACH_R3(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R4
CUBLAS_BLAS_ROUTINE_EACH_R4(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cublasXt.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cublas_dso_flag;
extern void *cublas_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublas routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublas_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublas_dso_flag, []() { \
cublas_dso_handle = pten::dynload::GetCublasDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublas_dso_handle, #__name); \
return reinterpret_cast<cublas_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSaxpy_v2); \
__macro(cublasDaxpy_v2); \
__macro(cublasCaxpy_v2); \
__macro(cublasZaxpy_v2); \
__macro(cublasSscal_v2); \
__macro(cublasDscal_v2); \
__macro(cublasScopy_v2); \
__macro(cublasDcopy_v2); \
__macro(cublasSgemv_v2); \
__macro(cublasDgemv_v2); \
__macro(cublasCgemv_v2); \
__macro(cublasZgemv_v2); \
__macro(cublasSgemm_v2); \
__macro(cublasDgemm_v2); \
__macro(cublasCgemm_v2); \
__macro(cublasZgemm_v2); \
__macro(cublasHgemm); \
__macro(cublasSgemmEx); \
__macro(cublasSgeam); \
__macro(cublasDgeam); \
__macro(cublasStrsm_v2); \
__macro(cublasDtrsm_v2); \
__macro(cublasCtrsm_v2); \
__macro(cublasZtrsm_v2); \
__macro(cublasCreate_v2); \
__macro(cublasDestroy_v2); \
__macro(cublasSetStream_v2); \
__macro(cublasSetPointerMode_v2); \
__macro(cublasGetPointerMode_v2); \
__macro(cublasSgemmBatched); \
__macro(cublasDgemmBatched); \
__macro(cublasCgemmBatched); \
__macro(cublasZgemmBatched); \
__macro(cublasStrsmBatched); \
__macro(cublasDtrsmBatched); \
__macro(cublasCtrsmBatched); \
__macro(cublasZtrsmBatched); \
__macro(cublasSgetrfBatched); \
__macro(cublasSgetriBatched); \
__macro(cublasDgetrfBatched); \
__macro(cublasDgetriBatched); \
__macro(cublasSmatinvBatched); \
__macro(cublasDmatinvBatched); \
__macro(cublasSgetrsBatched); \
__macro(cublasDgetrsBatched);
CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
// APIs available after CUDA 8.0
#if CUDA_VERSION >= 8000
#define CUBLAS_BLAS_ROUTINE_EACH_R2(__macro) \
__macro(cublasGemmEx); \
__macro(cublasSgemmStridedBatched); \
__macro(cublasDgemmStridedBatched); \
__macro(cublasCgemmStridedBatched); \
__macro(cublasZgemmStridedBatched); \
__macro(cublasHgemmStridedBatched);
CUBLAS_BLAS_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.0
#if CUDA_VERSION >= 9000
#define CUBLAS_BLAS_ROUTINE_EACH_R3(__macro) \
__macro(cublasSetMathMode); \
__macro(cublasGetMathMode);
CUBLAS_BLAS_ROUTINE_EACH_R3(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
// APIs available after CUDA 9.1
#if CUDA_VERSION >= 9010
#define CUBLAS_BLAS_ROUTINE_EACH_R4(__macro) \
__macro(cublasGemmBatchedEx); \
__macro(cublasGemmStridedBatchedEx);
CUBLAS_BLAS_ROUTINE_EACH_R4(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cublasLt.h"
namespace pten {
namespace dynload {
std::once_flag cublasLt_dso_flag;
void *cublasLt_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUBLASLT_BLAS_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cublasLt.h>
#include <cuda.h>
#include <mutex> // NOLINT
#include <type_traits>
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cublasLt_dso_flag;
extern void *cublasLt_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cublasLt routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cublasLt_func = \
decltype(::__name(std::declval<Args>()...)) (*)(Args...); \
std::call_once(cublasLt_dso_flag, []() { \
cublasLt_dso_handle = pten::dynload::GetCublasLtDsoHandle(); \
}); \
static void *p_##__name = dlsym(cublasLt_dso_handle, #__name); \
return reinterpret_cast<cublasLt_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
// APIs available after CUDA 10.1
// #if CUDA_VERSION >= 10100
#define CUBLASLT_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasLtCreate); \
__macro(cublasLtDestroy); \
__macro(cublasLtMatmul); \
__macro(cublasLtMatmulDescCreate); \
__macro(cublasLtMatmulDescDestroy); \
__macro(cublasLtMatmulDescSetAttribute); \
__macro(cublasLtMatrixLayoutCreate); \
__macro(cublasLtMatrixLayoutDestroy); \
__macro(cublasLtMatrixLayoutSetAttribute); \
__macro(cublasLtMatrixTransform); \
__macro(cublasLtMatrixTransformDescCreate); \
__macro(cublasLtMatrixTransformDescDestroy); \
__macro(cublasLtMatrixTransformDescSetAttribute);
CUBLASLT_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP)
// #endif
#undef DECLARE_DYNAMIC_LOAD_CUBLASLT_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cuda_driver.h"
namespace pten {
namespace dynload {
std::once_flag cuda_dso_flag;
void* cuda_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#if CUDA_VERSION >= 10020
CUDA_ROUTINE_EACH_VVM(DEFINE_WRAP);
#endif
CUDA_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUDADriver() {
std::call_once(cuda_dso_flag, []() { cuda_dso_handle = GetCUDADsoHandle(); });
return cuda_dso_handle != nullptr;
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cuda_dso_flag;
extern void* cuda_dso_handle;
extern bool HasCUDADriver();
#define DECLARE_DYNAMIC_LOAD_CUDA_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cuda_func = decltype(&::__name); \
std::call_once(cuda_dso_flag, []() { \
cuda_dso_handle = pten::dynload::GetCUDADsoHandle(); \
}); \
static void* p_##__name = dlsym(cuda_dso_handle, #__name); \
return reinterpret_cast<cuda_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cuda driver functions
**/
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cuInit); \
__macro(cuDriverGetVersion); \
__macro(cuGetErrorString); \
__macro(cuModuleLoadData); \
__macro(cuModuleGetFunction); \
__macro(cuModuleUnload); \
__macro(cuOccupancyMaxActiveBlocksPerMultiprocessor); \
__macro(cuLaunchKernel); \
__macro(cuCtxCreate); \
__macro(cuCtxGetCurrent); \
__macro(cuDeviceGetCount); \
__macro(cuDevicePrimaryCtxGetState); \
__macro(cuDeviceGetAttribute); \
__macro(cuDeviceGet)
#if CUDA_VERSION >= 10020
#define CUDA_ROUTINE_EACH_VVM(__macro) \
__macro(cuMemGetAllocationGranularity); \
__macro(cuMemAddressReserve); \
__macro(cuMemCreate); \
__macro(cuMemMap); \
__macro(cuMemSetAccess); \
__macro(cuMemUnmap); \
__macro(cuMemRelease); \
__macro(cuMemAddressFree)
CUDA_ROUTINE_EACH_VVM(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#endif
CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUDA_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
namespace pten {
namespace dynload {
std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R8
CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
#endif
bool HasCUDNN() {
std::call_once(cudnn_dso_flag,
[]() { cudnn_dso_handle = GetCUDNNDsoHandle(); });
return cudnn_dso_handle != nullptr;
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle,
paddle::platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cudnn_dso_flag;
extern void* cudnn_dso_handle;
extern bool HasCUDNN();
extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = pten::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cudnn functions in HPPL
* different cudnn version has different interfaces
**/
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnSetTensorNdDescriptor); \
__macro(cudnnGetTensorNdDescriptor); \
__macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetFilterNdDescriptor); \
__macro(cudnnGetFilterNdDescriptor); \
__macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnSetPoolingNdDescriptor); \
__macro(cudnnGetPoolingNdDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnSetConvolutionNdDescriptor); \
__macro(cudnnGetConvolutionNdDescriptor); \
__macro(cudnnDeriveBNTensorDescriptor); \
__macro(cudnnCreateSpatialTransformerDescriptor); \
__macro(cudnnSetSpatialTransformerNdDescriptor); \
__macro(cudnnDestroySpatialTransformerDescriptor); \
__macro(cudnnSpatialTfGridGeneratorForward); \
__macro(cudnnSpatialTfGridGeneratorBackward); \
__macro(cudnnSpatialTfSamplerForward); \
__macro(cudnnSpatialTfSamplerBackward); \
__macro(cudnnCreate); \
__macro(cudnnDestroy); \
__macro(cudnnSetStream); \
__macro(cudnnActivationForward); \
__macro(cudnnActivationBackward); \
__macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \
__macro(cudnnTransformTensor); \
__macro(cudnnPoolingForward); \
__macro(cudnnPoolingBackward); \
__macro(cudnnSoftmaxBackward); \
__macro(cudnnSoftmaxForward); \
__macro(cudnnGetVersion); \
__macro(cudnnFindConvolutionForwardAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithm); \
__macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \
__macro(cudnnGetErrorString); \
__macro(cudnnCreateDropoutDescriptor); \
__macro(cudnnDropoutGetStatesSize); \
__macro(cudnnSetDropoutDescriptor); \
__macro(cudnnRestoreDropoutDescriptor); \
__macro(cudnnCreateRNNDescriptor); \
__macro(cudnnGetRNNParamsSize); \
__macro(cudnnGetRNNWorkspaceSize); \
__macro(cudnnGetRNNTrainingReserveSize); \
__macro(cudnnRNNForwardTraining); \
__macro(cudnnRNNBackwardData); \
__macro(cudnnRNNBackwardWeights); \
__macro(cudnnRNNForwardInference); \
__macro(cudnnDestroyDropoutDescriptor); \
__macro(cudnnDestroyRNNDescriptor); \
__macro(cudnnSetTensorNdDescriptorEx); \
__macro(cudnnAddTensor); \
__macro(cudnnConvolutionBackwardData); \
__macro(cudnnConvolutionBackwardFilter); \
__macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \
__macro(cudnnGetConvolutionBackwardDataWorkspaceSize); \
__macro(cudnnBatchNormalizationForwardTraining); \
__macro(cudnnBatchNormalizationForwardInference); \
__macro(cudnnBatchNormalizationBackward); \
__macro(cudnnCreateActivationDescriptor); \
__macro(cudnnSetActivationDescriptor); \
__macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor); \
__macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm); \
__macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnSetRNNDescriptor);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7001
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(cudnnSetConvolutionGroupCount); \
__macro(cudnnSetConvolutionMathType); \
__macro(cudnnConvolutionBiasActivationForward); \
__macro(cudnnCreateCTCLossDescriptor); \
__macro(cudnnDestroyCTCLossDescriptor); \
__macro(cudnnGetCTCLossDescriptor); \
__macro(cudnnSetCTCLossDescriptor); \
__macro(cudnnGetCTCLossWorkspaceSize); \
__macro(cudnnCTCLoss); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm_v7); \
__macro(cudnnGetConvolutionForwardAlgorithm_v7); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount);
CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7201
#define CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(__macro) \
__macro(cudnnCreateRNNDataDescriptor); \
__macro(cudnnDestroyRNNDataDescriptor); \
__macro(cudnnSetRNNDataDescriptor); \
__macro(cudnnSetRNNPaddingMode); \
__macro(cudnnRNNForwardTrainingEx); \
__macro(cudnnRNNBackwardDataEx); \
__macro(cudnnRNNBackwardWeightsEx); \
__macro(cudnnRNNForwardInferenceEx);
CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 7401
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \
__macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \
__macro(cudnnBatchNormalizationForwardTrainingEx); \
__macro(cudnnGetBatchNormalizationBackwardExWorkspaceSize); \
__macro(cudnnBatchNormalizationBackwardEx); \
__macro(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);
CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#if CUDNN_VERSION >= 8000
#define CUDNN_DNN_ROUTINE_EACH_R8(__macro) \
__macro(cudnnSetRNNDescriptor_v8); \
__macro(cudnnCreateFusedOpsPlan); \
__macro(cudnnCreateFusedOpsConstParamPack); \
__macro(cudnnCreateFusedOpsVariantParamPack); \
__macro(cudnnDestroyFusedOpsPlan); \
__macro(cudnnDestroyFusedOpsConstParamPack); \
__macro(cudnnDestroyFusedOpsVariantParamPack); \
__macro(cudnnFusedOpsExecute); \
__macro(cudnnSetFusedOpsConstParamPackAttribute); \
__macro(cudnnSetFusedOpsVariantParamPackAttribute); \
__macro(cudnnMakeFusedOpsPlan);
CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cufft.h"
#include "paddle/fluid/platform/enforce.h"
namespace pten {
namespace dynload {
std::once_flag cufft_dso_flag;
void* cufft_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
bool HasCUFFT() {
std::call_once(cufft_dso_flag,
[]() { cufft_dso_handle = GetCUFFTDsoHandle(); });
return cufft_dso_handle != nullptr;
}
void EnforceCUFFTLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
cufft_dso_handle,
paddle::platform::errors::PreconditionNotMet(
"Cannot load cufft shared library. Cannot invoke method %s.",
fn_name));
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <cufft.h>
#include <cufftXt.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cufft_dso_flag;
extern void* cufft_dso_handle;
extern bool HasCUFFT();
extern void EnforceCUFFTLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using cufft_func = decltype(&::__name); \
std::call_once(cufft_dso_flag, []() { \
cufft_dso_handle = pten::dynload::GetCUFFTDsoHandle(); \
}); \
EnforceCUFFTLoaded(#__name); \
static void* p_##__name = dlsym(cufft_dso_handle, #__name); \
return reinterpret_cast<cufft_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed cufft functions in HPPL
* different cufft version has different interfaces
**/
#define CUFFT_FFT_ROUTINE_EACH(__macro) \
__macro(cufftPlan1d); \
__macro(cufftPlan2d); \
__macro(cufftPlan3d); \
__macro(cufftPlanMany); \
__macro(cufftMakePlan1d); \
__macro(cufftMakePlan2d); \
__macro(cufftMakePlan3d); \
__macro(cufftMakePlanMany); \
__macro(cufftMakePlanMany64); \
__macro(cufftGetSizeMany64); \
__macro(cufftEstimate1d); \
__macro(cufftEstimate2d); \
__macro(cufftEstimate3d); \
__macro(cufftEstimateMany); \
__macro(cufftCreate); \
__macro(cufftGetSize1d); \
__macro(cufftGetSize2d); \
__macro(cufftGetSize3d); \
__macro(cufftGetSizeMany); \
__macro(cufftGetSize); \
__macro(cufftSetWorkArea); \
__macro(cufftSetAutoAllocation); \
__macro(cufftExecC2C); \
__macro(cufftExecR2C); \
__macro(cufftExecC2R); \
__macro(cufftExecZ2Z); \
__macro(cufftExecD2Z); \
__macro(cufftExecZ2D); \
__macro(cufftSetStream); \
__macro(cufftDestroy); \
__macro(cufftGetVersion); \
__macro(cufftGetProperty); \
__macro(cufftXtSetGPUs); \
__macro(cufftXtMalloc); \
__macro(cufftXtMemcpy); \
__macro(cufftXtFree); \
__macro(cufftXtSetWorkArea); \
__macro(cufftXtExecDescriptorC2C); \
__macro(cufftXtExecDescriptorR2C); \
__macro(cufftXtExecDescriptorC2R); \
__macro(cufftXtExecDescriptorZ2Z); \
__macro(cufftXtExecDescriptorD2Z); \
__macro(cufftXtExecDescriptorZ2D); \
__macro(cufftXtQueryPlan); \
__macro(cufftXtSetCallback); \
__macro(cufftXtClearCallback); \
__macro(cufftXtSetCallbackSharedSize); \
__macro(cufftXtMakePlanMany); \
__macro(cufftXtGetSizeMany); \
__macro(cufftXtExec); \
__macro(cufftXtExecDescriptor); \
__macro(cufftXtSetWorkAreaPolicy);
CUFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUFFT_WRAP)
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUPTI
#include "paddle/pten/backends/dynload/cupti.h"
namespace pten {
namespace dynload {
std::once_flag cupti_dso_flag;
void *cupti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
#endif // PADDLE_WITH_CUPTI
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUPTI
#include <cuda.h>
#include <cupti.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cupti_dso_flag;
extern void *cupti_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cupti routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
using cuptiFunc = decltype(&::__name); \
std::call_once(cupti_dso_flag, []() { \
cupti_dso_handle = pten::dynload::GetCUPTIDsoHandle(); \
}); \
static void *p_##__name = dlsym(cupti_dso_handle, #__name); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUPTI_ROUTINE_EACH(__macro) \
__macro(cuptiActivityEnable); \
__macro(cuptiActivityDisable); \
__macro(cuptiActivityRegisterCallbacks); \
__macro(cuptiActivityGetAttribute); \
__macro(cuptiActivitySetAttribute); \
__macro(cuptiGetTimestamp); \
__macro(cuptiActivityGetNextRecord); \
__macro(cuptiGetResultString); \
__macro(cuptiActivityGetNumDroppedRecords); \
__macro(cuptiActivityFlushAll); \
__macro(cuptiSubscribe); \
__macro(cuptiUnsubscribe); \
__macro(cuptiEnableCallback); \
__macro(cuptiEnableDomain);
CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
} // namespace dynload
} // namespace pten
#endif // PADDLE_WITH_CUPTI
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define CUPTI_LIB_PATH "@CUPTI_LIBRARY_PATH@"
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/curand.h"
namespace pten {
namespace dynload {
std::once_flag curand_dso_flag;
void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <curand.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag curand_dso_flag;
extern void *curand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
using curandFunc = decltype(&::__name); \
std::call_once(curand_dso_flag, []() { \
curand_dso_handle = pten::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(curand_dso_handle, #__name); \
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator); \
__macro(curandSetStream); \
__macro(curandSetPseudoRandomGeneratorSeed); \
__macro(curandGenerateUniform); \
__macro(curandGenerateUniformDouble); \
__macro(curandGenerateNormal); \
__macro(curandDestroyGenerator);
CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cusolver.h"
namespace pten {
namespace dynload {
std::once_flag cusolver_dso_flag;
void *cusolver_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUSOLVER_ROUTINE_EACH(DEFINE_WRAP);
#ifdef CUSOLVER_ROUTINE_EACH_R1
CUSOLVER_ROUTINE_EACH_R1(DEFINE_WRAP);
#endif
#ifdef CUSOLVER_ROUTINE_EACH_R2
CUSOLVER_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <cusolverDn.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cusolver_dso_flag;
extern void *cusolver_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusolverStatus_t operator()(Args... args) { \
using cusolverFunc = decltype(&::__name); \
std::call_once(cusolver_dso_flag, []() { \
cusolver_dso_handle = pten::dynload::GetCusolverDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusolver_dso_handle, #__name); \
return reinterpret_cast<cusolverFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define CUSOLVER_ROUTINE_EACH(__macro) \
__macro(cusolverDnCreate); \
__macro(cusolverDnDestroy); \
__macro(cusolverDnSetStream); \
__macro(cusolverDnSpotrf_bufferSize); \
__macro(cusolverDnDpotrf_bufferSize); \
__macro(cusolverDnSpotrf); \
__macro(cusolverDnDpotrf); \
__macro(cusolverDnSpotrs); \
__macro(cusolverDnDpotrs); \
__macro(cusolverDnCpotrs); \
__macro(cusolverDnZpotrs); \
__macro(cusolverDnSsyevd_bufferSize); \
__macro(cusolverDnDsyevd_bufferSize); \
__macro(cusolverDnCheevd_bufferSize); \
__macro(cusolverDnZheevd_bufferSize); \
__macro(cusolverDnSsyevd); \
__macro(cusolverDnDsyevd); \
__macro(cusolverDnCheevd); \
__macro(cusolverDnZheevd);
CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
#if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R1(__macro) \
__macro(cusolverDnSpotrfBatched); \
__macro(cusolverDnDpotrfBatched); \
__macro(cusolverDnSpotrsBatched); \
__macro(cusolverDnDpotrsBatched); \
__macro(cusolverDnSgesvdj_bufferSize); \
__macro(cusolverDnSgetrf_bufferSize); \
__macro(cusolverDnDgetrf_bufferSize); \
__macro(cusolverDnCgetrf_bufferSize); \
__macro(cusolverDnZgetrf_bufferSize); \
__macro(cusolverDnSgeqrf_bufferSize); \
__macro(cusolverDnDgeqrf_bufferSize); \
__macro(cusolverDnCgeqrf_bufferSize); \
__macro(cusolverDnZgeqrf_bufferSize); \
__macro(cusolverDnSorgqr_bufferSize); \
__macro(cusolverDnDorgqr_bufferSize); \
__macro(cusolverDnSormqr_bufferSize); \
__macro(cusolverDnDormqr_bufferSize); \
__macro(cusolverDnCungqr_bufferSize); \
__macro(cusolverDnZungqr_bufferSize); \
__macro(cusolverDnDestroyGesvdjInfo); \
__macro(cusolverDnCreateGesvdjInfo); \
__macro(cusolverDnDgesvdj_bufferSize); \
__macro(cusolverDnSgesvdj); \
__macro(cusolverDnDgesvdj); \
__macro(cusolverDnSgetrf); \
__macro(cusolverDnDgetrf); \
__macro(cusolverDnCgetrf); \
__macro(cusolverDnZgetrf); \
__macro(cusolverDnSgeqrf); \
__macro(cusolverDnDgeqrf); \
__macro(cusolverDnCgeqrf); \
__macro(cusolverDnZgeqrf); \
__macro(cusolverDnSorgqr); \
__macro(cusolverDnDorgqr); \
__macro(cusolverDnSormqr); \
__macro(cusolverDnDormqr); \
__macro(cusolverDnCungqr); \
__macro(cusolverDnZungqr);
CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R2(__macro) \
__macro(cusolverDnCreateSyevjInfo); \
__macro(cusolverDnSsyevj_bufferSize); \
__macro(cusolverDnDsyevj_bufferSize); \
__macro(cusolverDnSsyevj); \
__macro(cusolverDnDsyevj); \
__macro(cusolverDnDestroySyevjInfo);
CUSOLVER_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/cusparse.h"
namespace pten {
namespace dynload {
std::once_flag cusparse_dso_flag;
void *cusparse_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#ifdef CUSPARSE_ROUTINE_EACH
CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
#endif
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R2
CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif
#ifdef CUSPARSE_ROUTINE_EACH_11020
CUSPARSE_ROUTINE_EACH_11020(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda.h>
#include <cusparse.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag cusparse_dso_flag;
extern void *cusparse_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
cusparseStatus_t operator()(Args... args) { \
using cusparseFunc = decltype(&::__name); \
std::call_once(cusparse_dso_flag, []() { \
cusparse_dso_handle = pten::dynload::GetCusparseDsoHandle(); \
}); \
static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \
return reinterpret_cast<cusparseFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#if defined(PADDLE_WITH_CUDA)
// The generic APIs is supported from CUDA10.1
#if CUDA_VERSION >= 10010
#define CUSPARSE_ROUTINE_EACH(__macro) \
__macro(cusparseCreate); \
__macro(cusparseSetStream); \
__macro(cusparseCreateMatDescr); \
__macro(cusparseDestroy); \
__macro(cusparseSnnz); \
__macro(cusparseDnnz); \
__macro(cusparseSetMatType); \
__macro(cusparseSetMatIndexBase);
CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP);
// APIs available after CUDA 11.2
#if CUDA_VERSION >= 11020
#define CUSPARSE_ROUTINE_EACH_11020(__macro) \
__macro(cusparseCreateCsr); \
__macro(cusparseCreateCoo); \
__macro(cusparseCreateDnMat); \
__macro(cusparseSpMM_bufferSize); \
__macro(cusparseSpMM); \
__macro(cusparseDestroySpMat); \
__macro(cusparseDestroyDnMat); \
__macro(cusparseCooSetPointers); \
__macro(cusparseCsrSetPointers); \
__macro(cusparseDenseToSparse_bufferSize); \
__macro(cusparseDenseToSparse_analysis); \
__macro(cusparseDenseToSparse_convert); \
__macro(cusparseSparseToDense_bufferSize); \
__macro(cusparseSparseToDense);
CUSPARSE_ROUTINE_EACH_11020(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
// APIs available after CUDA 11.3
#if CUDA_VERSION >= 11030
#define CUSPARSE_ROUTINE_EACH_R2(__macro) \
__macro(cusparseSDDMM_bufferSize); \
__macro(cusparseSDDMM_preprocess); \
__macro(cusparseSDDMM);
CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
#endif
#endif
#endif
#endif
#undef DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
} // namespace dynload
} // namespace pten
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
namespace pten {
namespace dynload {
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void* GetCublasDsoHandle();
void* GetCublasLtDsoHandle();
void* GetCUDNNDsoHandle();
void* GetCUPTIDsoHandle();
void* GetCurandDsoHandle();
void* GetNvjpegDsoHandle();
void* GetCusolverDsoHandle();
void* GetCusparseDsoHandle();
void* GetNVRTCDsoHandle();
void* GetCUDADsoHandle();
void* GetWarpCTCDsoHandle();
void* GetNCCLDsoHandle();
void* GetHCCLDsoHandle();
void* GetTensorRtDsoHandle();
void* GetMKLMLDsoHandle();
void* GetLAPACKDsoHandle();
void* GetOpDsoHandle(const std::string& dso_name);
void* GetNvtxDsoHandle();
void* GetCUFFTDsoHandle();
void* GetMKLRTDsoHandle();
void* GetROCFFTDsoHandle();
void SetPaddleLibPath(const std::string&);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hipfft.h"
namespace pten {
namespace dynload {
std::once_flag hipfft_dso_flag;
void *hipfft_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_HIP
#include <hipfft.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag hipfft_dso_flag;
extern void *hipfft_dso_handle;
#define DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hipfftFunc = decltype(&::__name); \
std::call_once(hipfft_dso_flag, []() { \
hipfft_dso_handle = pten::dynload::GetROCFFTDsoHandle(); \
}); \
static void *p_##__name = dlsym(hipfft_dso_handle, #__name); \
return reinterpret_cast<hipfftFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define HIPFFT_FFT_ROUTINE_EACH(__macro) \
__macro(hipfftPlan1d); \
__macro(hipfftPlan2d); \
__macro(hipfftPlan3d); \
__macro(hipfftPlanMany); \
__macro(hipfftMakePlan1d); \
__macro(hipfftMakePlanMany); \
__macro(hipfftMakePlanMany64); \
__macro(hipfftGetSizeMany64); \
__macro(hipfftEstimate1d); \
__macro(hipfftEstimate2d); \
__macro(hipfftEstimate3d); \
__macro(hipfftEstimateMany); \
__macro(hipfftCreate); \
__macro(hipfftGetSize1d); \
__macro(hipfftGetSizeMany); \
__macro(hipfftGetSize); \
__macro(hipfftSetWorkArea); \
__macro(hipfftSetAutoAllocation); \
__macro(hipfftExecC2C); \
__macro(hipfftExecR2C); \
__macro(hipfftExecC2R); \
__macro(hipfftExecZ2Z); \
__macro(hipfftExecD2Z); \
__macro(hipfftExecZ2D); \
__macro(hipfftSetStream); \
__macro(hipfftDestroy); \
__macro(hipfftGetVersion); \
__macro(hipfftGetProperty);
HIPFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
inline const char *hipfftGetErrorString(hipfftResult_t status) {
switch (status) {
case HIPFFT_SUCCESS:
return "'HIPFFT_SUCCESS'. The hipFFT operation was successful.";
case HIPFFT_INVALID_PLAN:
return "'HIPFFT_INVALID_PLAN'. hipFFT was passed an invalid plan handle.";
case HIPFFT_ALLOC_FAILED:
return "'HIPFFT_ALLOC_FAILED'. hipFFT failed to allocate GPU or CPU "
"memory.";
case HIPFFT_INVALID_TYPE:
return "'HIPFFT_INVALID_TYPE'. No longer used.";
case HIPFFT_INVALID_VALUE:
return "'HIPFFT_INVALID_VALUE'. User specified an invalid pointer or "
"parameter.";
case HIPFFT_INTERNAL_ERROR:
return "'HIPFFT_INTERNAL_ERROR'. Driver or internal hipFFT library "
"error.";
case HIPFFT_EXEC_FAILED:
return "'HIPFFT_EXEC_FAILED'. Failed to execute an FFT on the GPU.";
case HIPFFT_SETUP_FAILED:
return "'HIPFFT_SETUP_FAILED'. The hipFFT library failed to initialize.";
case HIPFFT_INVALID_SIZE:
return "'HIPFFT_INVALID_SIZE'. User specified an invalid transform size.";
case HIPFFT_UNALIGNED_DATA:
return "'HIPFFT_UNALIGNED_DATA'. No longer used.";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:
return "'HIPFFT_INCOMPLETE_PARAMETER_LIST'. Missing parameters in call.";
case HIPFFT_INVALID_DEVICE:
return "'HIPFFT_INVALID_DEVICE'. Execution of a plan was on different "
"GPU than plan creation.";
case HIPFFT_PARSE_ERROR:
return "'HIPFFT_PARSE_ERROR'. Internal plan database error.";
case HIPFFT_NO_WORKSPACE:
return "'HIPFFT_NO_WORKSPACE'. No workspace has been provided prior to "
"plan execution.";
case HIPFFT_NOT_IMPLEMENTED:
return "'HIPFFT_NOT_IMPLEMENTED'. Function does not implement "
"functionality for parameters given.";
case HIPFFT_NOT_SUPPORTED:
return "'HIPFFT_NOT_SUPPORTED'. Operation is not supported for "
"parameters given.";
default:
return "HIPFFT_STATUS_UNKNOWN_ERROR";
}
}
} // namespace dynload
} // namespace pten
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hiprand.h"
namespace pten {
namespace dynload {
std::once_flag hiprand_dso_flag;
void *hiprand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <hiprand.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/port.h"
#include "paddle/pten/backends/dynload/dynamic_loader.h"
namespace pten {
namespace dynload {
extern std::once_flag hiprand_dso_flag;
extern void *hiprand_dso_handle;
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
hiprandStatus_t operator()(Args... args) { \
using hiprandFunc = decltype(&::__name); \
std::call_once(hiprand_dso_flag, []() { \
hiprand_dso_handle = pten::dynload::GetCurandDsoHandle(); \
}); \
static void *p_##__name = dlsym(hiprand_dso_handle, #__name); \
return reinterpret_cast<hiprandFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define HIPRAND_RAND_ROUTINE_EACH(__macro) \
__macro(hiprandCreateGenerator); \
__macro(hiprandSetStream); \
__macro(hiprandSetPseudoRandomGeneratorSeed); \
__macro(hiprandGenerateUniform); \
__macro(hiprandGenerateUniformDouble); \
__macro(hiprandGenerateNormal); \
__macro(hiprandDestroyGenerator);
HIPRAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/hiprtc.h"
namespace pten {
namespace dynload {
std::once_flag hiprtc_dso_flag;
void* hiprtc_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPRTC_ROUTINE_EACH(DEFINE_WRAP);
bool HasNVRTC() {
std::call_once(hiprtc_dso_flag,
[]() { hiprtc_dso_handle = GetNVRTCDsoHandle(); });
return hiprtc_dso_handle != nullptr;
}
} // namespace dynload
} // namespace pten
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <hip/hiprtc.h>
#include <mutex> // NOLINT
#include "paddle/pten/backends/dynload/dynamic_loader.h"
#include "paddle/pten/backends/dynload/port.h"
namespace pten {
namespace dynload {
extern std::once_flag hiprtc_dso_flag;
extern void* hiprtc_dso_handle;
extern bool HasNVRTC();
#define DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hiprtc_func = decltype(&::__name); \
std::call_once(hiprtc_dso_flag, []() { \
hiprtc_dso_handle = pten::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(hiprtc_dso_handle, #__name); \
return reinterpret_cast<hiprtc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name
/**
* include all needed hiprtc functions
**/
#define HIPRTC_ROUTINE_EACH(__macro) \
__macro(hiprtcVersion); \
__macro(hiprtcGetErrorString); \
__macro(hiprtcCompileProgram); \
__macro(hiprtcCreateProgram); \
__macro(hiprtcDestroyProgram); \
__macro(hiprtcGetCode); \
__macro(hiprtcGetCodeSize); \
__macro(hiprtcGetProgramLog); \
__macro(hiprtcGetProgramLogSize)
HIPRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP);
#undef DECLARE_DYNAMIC_LOAD_HIPRTC_WRAP
} // namespace dynload
} // namespace pten
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/dynload/lapack.h"
#include <mutex>
namespace pten {
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
LAPACK_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace pten
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册