提交 5b8fe87f 编写于 作者: L liaogang

dlopen lapacke api and remove gfotran

上级 896b9c55
......@@ -27,33 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
if (NOT CMAKE_Fortran_COMPILER_VERSION)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION)
endif()
string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION})
list(GET Fortran_VERSION 0 Fortran_MAJOR)
list(GET Fortran_VERSION 1 Fortran_MINOR)
find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/)
if (NOT GFORTRAN_LIBRARY)
message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas")
endif()
find_package(Threads REQUIRED)
LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add(
......@@ -64,7 +37,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_LAPACK=1 DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
......@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES
if(WITH_GPU)
set(CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc
${CUDA_CXX_WITH_GPU_SOURCES})
set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else()
set(CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc)
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
endif()
set(CUDA_CU_SOURCES
......@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES
set(CUDA_HEADERS
include/hl_time.h
include/hl_dso_loader.h
include/hl_warpctc_wrap.h
include/hl_sequence.h
include/hl_cuda_cublas.h
......
......@@ -16,8 +16,8 @@ limitations under the License. */
#include <sys/time.h>
#include <mutex>
#include "hl_cuda.h"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h"
namespace dynload {
......
......@@ -17,8 +17,8 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <mutex>
#include "hl_cuda_cudnn.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h"
DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
......
......@@ -24,8 +24,8 @@ limitations under the License. */
#include <mutex>
#include "hl_cuda.ph"
#include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoad.h"
// clang-format on
namespace dynload {
......@@ -98,11 +98,11 @@ int g_cuda_lib_version = 0;
* Check build-in cuda function using glog and it **does not**
* support << operator for more details error info.
*/
#define CHECK_CUDA(cudaFunc) \
do { \
cudaError_t cudaStat = cudaFunc; \
CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \
<< cudaGetErrorString(cudaStat); \
#define CHECK_CUDA(cudaFunc) \
do { \
cudaError_t cudaStat = cudaFunc; \
CHECK_EQ(cudaSuccess, cudaStat) \
<< "Cuda Error: " << cudaGetErrorString(cudaStat); \
} while (0)
/**
......@@ -469,8 +469,8 @@ void hl_specify_devices_start(int *device, int number) {
CHECK(tmp) << "[Start failed] System memory is not enough.";
g_device = (hl_device_prop *)tmp;
device_prop = (hl_device_prop)(
(char *)tmp + g_system_device_num * sizeof(hl_device_prop *));
device_prop = (hl_device_prop)((char *)tmp + g_system_device_num *
sizeof(hl_device_prop *));
memset(g_device, 0, g_system_device_num * sizeof(hl_device_prop *));
int num = 0;
for (int i = 0; i < number; i++) {
......@@ -559,8 +559,8 @@ bool hl_get_sync_flag() { return g_sync_flag; }
void hl_stream_synchronize(hl_stream_t stream) {
cudaStream_t cu_stream;
CHECK_LT(stream, HPPL_STREAM_END) << __func__
<< ": the parameter stream is error.";
CHECK_LT(stream, HPPL_STREAM_END)
<< __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaStreamSynchronize(cu_stream));
......@@ -590,8 +590,8 @@ void hl_stream_record_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream;
CHECK_NOTNULL(event);
CHECK_LT(stream, HPPL_STREAM_END) << __func__
<< ": the parameter stream is error.";
CHECK_LT(stream, HPPL_STREAM_END)
<< __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaEventRecord(event->cu_event, cu_stream));
......@@ -601,8 +601,8 @@ void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream;
CHECK_NOTNULL(event);
CHECK_LT(stream, HPPL_STREAM_END) << __func__
<< ": the parameter stream is error.";
CHECK_LT(stream, HPPL_STREAM_END)
<< __func__ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
CHECK_CUDA(cudaStreamWaitEvent(cu_stream, event->cu_event, 0));
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h"
#include <mutex>
#include "hl_dso_loader.h"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h"
namespace dynload {
......
......@@ -15,6 +15,49 @@ limitations under the License. */
#include "MathFunctions.h"
#include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoad.h"
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
int operator()(Args... args)->decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapack_func>(p_##__name)(args...); \
} \
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(clapack_sgetrf) \
__macro(clapack_dgetrf) \
__macro(clapack_sgetri) \
__macro(clapack_dgetri)
#else
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(LAPACKE_sgetrf) \
__macro(LAPACKE_dgetrf) \
__macro(LAPACKE_sgetri) \
__macro(LAPACKE_dgetri)
#endif
#endif
// clang-format on
} // namespace dynload
namespace paddle {
......@@ -87,9 +130,9 @@ int getrf<float>(const CBLAS_ORDER order,
int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return clapack_sgetrf(order, M, N, A, lda, ipiv);
return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
#else
return LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
......@@ -106,9 +149,9 @@ int getrf<double>(const CBLAS_ORDER order,
int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return clapack_dgetrf(order, M, N, A, lda, ipiv);
return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
#else
return LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
......@@ -124,9 +167,9 @@ int getri<float>(const CBLAS_ORDER order,
const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return clapack_sgetri(order, N, A, lda, ipiv);
return dynload::clapack_sgetri(order, N, A, lda, ipiv);
#else
return LAPACKE_sgetri(order, N, A, lda, ipiv);
return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
......@@ -142,9 +185,9 @@ int getri<double>(const CBLAS_ORDER order,
const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return clapack_dgetri(order, N, A, lda, ipiv);
return dynload::clapack_dgetri(order, N, A, lda, ipiv);
#else
return LAPACKE_dgetri(order, N, A, lda, ipiv);
return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_dso_loader.h"
#include "DynamicLoad.h"
#include "Logging.h"
#include <gflags/gflags.h>
#include "paddle/utils/Logging.h"
DEFINE_string(cudnn_dir,
"",
......@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir,
DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");
DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");
static inline std::string join(const std::string& part1,
const std::string& part2) {
// directory separator
......@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle);
#endif
}
void GetLapackDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.dylib", dso_handle);
#else
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.so", dso_handle);
#endif
}
......@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_DSO_LOADER_H_
#define HL_DSO_LOADER_H_
#ifndef HL_DYNAMIC_LOAD_H_
#define HL_DYNAMIC_LOAD_H_
#include <dlfcn.h>
#include <memory>
#include <string>
#include "hl_base.h"
/**
* @brief load the DSO of CUBLAS
......@@ -52,4 +51,12 @@ void GetCurandDsoHandle(void** dso_handle);
*/
void GetWarpCTCDsoHandle(void** dso_handle);
#endif // HL_DSO_LOADER_H_
/**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void GetLapackDsoHandle(void** dso_handle);
#endif // HL_DYNAMIC_LOAD_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册