提交 5b8fe87f 编写于 作者: L liaogang

dlopen lapacke api and remove gfotran

上级 896b9c55
...@@ -27,33 +27,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -27,33 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
if (NOT CMAKE_Fortran_COMPILER_VERSION)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION)
endif()
string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION})
list(GET Fortran_VERSION 0 Fortran_MAJOR)
list(GET Fortran_VERSION 1 Fortran_MINOR)
find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/)
if (NOT GFORTRAN_LIBRARY)
message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas")
endif()
find_package(Threads REQUIRED)
LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK) ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add( ExternalProject_Add(
...@@ -64,7 +37,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -64,7 +37,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_LAPACK=1 DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR> INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES ...@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES
if(WITH_GPU) if(WITH_GPU)
set(CUDA_CXX_SOURCES set(CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc src/hl_warpctc_wrap.cc
${CUDA_CXX_WITH_GPU_SOURCES}) ${CUDA_CXX_WITH_GPU_SOURCES})
set_source_files_properties(${CUDA_CXX_SOURCES} set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__") PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else() else()
set(CUDA_CXX_SOURCES set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc)
endif() endif()
set(CUDA_CU_SOURCES set(CUDA_CU_SOURCES
...@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES ...@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES
set(CUDA_HEADERS set(CUDA_HEADERS
include/hl_time.h include/hl_time.h
include/hl_dso_loader.h
include/hl_warpctc_wrap.h include/hl_warpctc_wrap.h
include/hl_sequence.h include/hl_sequence.h
include/hl_cuda_cublas.h include/hl_cuda_cublas.h
......
...@@ -16,8 +16,8 @@ limitations under the License. */ ...@@ -16,8 +16,8 @@ limitations under the License. */
#include <sys/time.h> #include <sys/time.h>
#include <mutex> #include <mutex>
#include "hl_cuda.h" #include "hl_cuda.h"
#include "hl_dso_loader.h"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -17,8 +17,8 @@ limitations under the License. */ ...@@ -17,8 +17,8 @@ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <mutex> #include <mutex>
#include "hl_cuda_cudnn.ph" #include "hl_cuda_cudnn.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
DEFINE_int32(cudnn_conv_workspace_limit_in_mb, DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
......
...@@ -24,8 +24,8 @@ limitations under the License. */ ...@@ -24,8 +24,8 @@ limitations under the License. */
#include <mutex> #include <mutex>
#include "hl_cuda.ph" #include "hl_cuda.ph"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoad.h"
// clang-format on // clang-format on
namespace dynload { namespace dynload {
...@@ -98,11 +98,11 @@ int g_cuda_lib_version = 0; ...@@ -98,11 +98,11 @@ int g_cuda_lib_version = 0;
* Check build-in cuda function using glog and it **does not** * Check build-in cuda function using glog and it **does not**
* support << operator for more details error info. * support << operator for more details error info.
*/ */
#define CHECK_CUDA(cudaFunc) \ #define CHECK_CUDA(cudaFunc) \
do { \ do { \
cudaError_t cudaStat = cudaFunc; \ cudaError_t cudaStat = cudaFunc; \
CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \ CHECK_EQ(cudaSuccess, cudaStat) \
<< cudaGetErrorString(cudaStat); \ << "Cuda Error: " << cudaGetErrorString(cudaStat); \
} while (0) } while (0)
/** /**
...@@ -469,8 +469,8 @@ void hl_specify_devices_start(int *device, int number) { ...@@ -469,8 +469,8 @@ void hl_specify_devices_start(int *device, int number) {
CHECK(tmp) << "[Start failed] System memory is not enough."; CHECK(tmp) << "[Start failed] System memory is not enough.";
g_device = (hl_device_prop *)tmp; g_device = (hl_device_prop *)tmp;
device_prop = (hl_device_prop)( device_prop = (hl_device_prop)((char *)tmp + g_system_device_num *
(char *)tmp + g_system_device_num * sizeof(hl_device_prop *)); sizeof(hl_device_prop *));
memset(g_device, 0, g_system_device_num * sizeof(hl_device_prop *)); memset(g_device, 0, g_system_device_num * sizeof(hl_device_prop *));
int num = 0; int num = 0;
for (int i = 0; i < number; i++) { for (int i = 0; i < number; i++) {
...@@ -559,8 +559,8 @@ bool hl_get_sync_flag() { return g_sync_flag; } ...@@ -559,8 +559,8 @@ bool hl_get_sync_flag() { return g_sync_flag; }
void hl_stream_synchronize(hl_stream_t stream) { void hl_stream_synchronize(hl_stream_t stream) {
cudaStream_t cu_stream; cudaStream_t cu_stream;
CHECK_LT(stream, HPPL_STREAM_END) << __func__ CHECK_LT(stream, HPPL_STREAM_END)
<< ": the parameter stream is error."; << __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream]; cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaStreamSynchronize(cu_stream)); CHECK_CUDA(cudaStreamSynchronize(cu_stream));
...@@ -590,8 +590,8 @@ void hl_stream_record_event(hl_stream_t stream, hl_event_t event) { ...@@ -590,8 +590,8 @@ void hl_stream_record_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream; cudaStream_t cu_stream;
CHECK_NOTNULL(event); CHECK_NOTNULL(event);
CHECK_LT(stream, HPPL_STREAM_END) << __func__ CHECK_LT(stream, HPPL_STREAM_END)
<< ": the parameter stream is error."; << __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream]; cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaEventRecord(event->cu_event, cu_stream)); CHECK_CUDA(cudaEventRecord(event->cu_event, cu_stream));
...@@ -601,8 +601,8 @@ void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) { ...@@ -601,8 +601,8 @@ void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream; cudaStream_t cu_stream;
CHECK_NOTNULL(event); CHECK_NOTNULL(event);
CHECK_LT(stream, HPPL_STREAM_END) << __func__ CHECK_LT(stream, HPPL_STREAM_END)
<< ": the parameter stream is error."; << __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream]; cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaStreamWaitEvent(cu_stream, event->cu_event, 0)); CHECK_CUDA(cudaStreamWaitEvent(cu_stream, event->cu_event, 0));
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h" #include "hl_warpctc_wrap.h"
#include <mutex> #include <mutex>
#include "hl_dso_loader.h" #include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -15,6 +15,49 @@ limitations under the License. */ ...@@ -15,6 +15,49 @@ limitations under the License. */
#include "MathFunctions.h" #include "MathFunctions.h"
#include "hl_matrix_apply.cuh" #include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh" #include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoad.h"
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
int operator()(Args... args)->decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapack_func>(p_##__name)(args...); \
} \
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(clapack_sgetrf) \
__macro(clapack_dgetrf) \
__macro(clapack_sgetri) \
__macro(clapack_dgetri)
#else
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(LAPACKE_sgetrf) \
__macro(LAPACKE_dgetrf) \
__macro(LAPACKE_sgetri) \
__macro(LAPACKE_dgetri)
#endif
#endif
// clang-format on
} // namespace dynload
namespace paddle { namespace paddle {
...@@ -87,9 +130,9 @@ int getrf<float>(const CBLAS_ORDER order, ...@@ -87,9 +130,9 @@ int getrf<float>(const CBLAS_ORDER order,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK #ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_sgetrf(order, M, N, A, lda, ipiv); return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
#else #else
return LAPACKE_sgetrf(order, M, N, A, lda, ipiv); return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif #endif
#else #else
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -106,9 +149,9 @@ int getrf<double>(const CBLAS_ORDER order, ...@@ -106,9 +149,9 @@ int getrf<double>(const CBLAS_ORDER order,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK #ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_dgetrf(order, M, N, A, lda, ipiv); return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
#else #else
return LAPACKE_dgetrf(order, M, N, A, lda, ipiv); return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif #endif
#else #else
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -124,9 +167,9 @@ int getri<float>(const CBLAS_ORDER order, ...@@ -124,9 +167,9 @@ int getri<float>(const CBLAS_ORDER order,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK #ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_sgetri(order, N, A, lda, ipiv); return dynload::clapack_sgetri(order, N, A, lda, ipiv);
#else #else
return LAPACKE_sgetri(order, N, A, lda, ipiv); return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif #endif
#else #else
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -142,9 +185,9 @@ int getri<double>(const CBLAS_ORDER order, ...@@ -142,9 +185,9 @@ int getri<double>(const CBLAS_ORDER order,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK #ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_dgetri(order, N, A, lda, ipiv); return dynload::clapack_dgetri(order, N, A, lda, ipiv);
#else #else
return LAPACKE_dgetri(order, N, A, lda, ipiv); return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif #endif
#else #else
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
......
...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "hl_dso_loader.h" #include "DynamicLoad.h"
#include "Logging.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "paddle/utils/Logging.h"
DEFINE_string(cudnn_dir, DEFINE_string(cudnn_dir,
"", "",
...@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir, ...@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir,
DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");
DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");
static inline std::string join(const std::string& part1, static inline std::string join(const std::string& part1,
const std::string& part2) { const std::string& part2) {
// directory separator // directory separator
...@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) { ...@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle);
#endif #endif
} }
void GetLapackDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.dylib", dso_handle);
#else
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.so", dso_handle);
#endif
}
...@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_DSO_LOADER_H_ #ifndef HL_DYNAMIC_LOAD_H_
#define HL_DSO_LOADER_H_ #define HL_DYNAMIC_LOAD_H_
#include <dlfcn.h> #include <dlfcn.h>
#include <memory> #include <memory>
#include <string> #include <string>
#include "hl_base.h"
/** /**
* @brief load the DSO of CUBLAS * @brief load the DSO of CUBLAS
...@@ -52,4 +51,12 @@ void GetCurandDsoHandle(void** dso_handle); ...@@ -52,4 +51,12 @@ void GetCurandDsoHandle(void** dso_handle);
*/ */
void GetWarpCTCDsoHandle(void** dso_handle); void GetWarpCTCDsoHandle(void** dso_handle);
#endif // HL_DSO_LOADER_H_ /**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void GetLapackDsoHandle(void** dso_handle);
#endif // HL_DYNAMIC_LOAD_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册