提交 d10f6cfb 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #1958 from gangliao/gfortran

remove gfortran and dlopen lapack libs
...@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) ...@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR}) if(${MKL_LAPACK_INC_DIR})
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
endif() endif()
return() # return file. return() # return file.
...@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) ...@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR) if(ATLAS_CLAPACK_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
set(CBLAS_INC_DIR ${CBLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR}) set(CBLAS_INC_DIR ${CBLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
endif() endif()
...@@ -115,7 +113,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) ...@@ -115,7 +113,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR) if(OPENBLAS_LAPACKE_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif() endif()
return() return()
......
...@@ -27,35 +27,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -27,35 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
if (NOT CMAKE_Fortran_COMPILER_VERSION)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION)
endif()
string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION})
list(GET Fortran_VERSION 0 Fortran_MAJOR)
list(GET Fortran_VERSION 1 Fortran_MINOR)
find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/)
if (NOT GFORTRAN_LIBRARY)
message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas")
endif()
find_package(Threads REQUIRED)
LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -64,7 +35,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -64,7 +35,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_LAPACK=1 DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR> INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES ...@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES
if(WITH_GPU) if(WITH_GPU)
set(CUDA_CXX_SOURCES set(CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc src/hl_warpctc_wrap.cc
${CUDA_CXX_WITH_GPU_SOURCES}) ${CUDA_CXX_WITH_GPU_SOURCES})
set_source_files_properties(${CUDA_CXX_SOURCES} set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__") PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else() else()
set(CUDA_CXX_SOURCES set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc)
endif() endif()
set(CUDA_CU_SOURCES set(CUDA_CU_SOURCES
...@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES ...@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES
set(CUDA_HEADERS set(CUDA_HEADERS
include/hl_time.h include/hl_time.h
include/hl_dso_loader.h
include/hl_warpctc_wrap.h include/hl_warpctc_wrap.h
include/hl_sequence.h include/hl_sequence.h
include/hl_cuda_cublas.h include/hl_cuda_cublas.h
......
...@@ -40,18 +40,18 @@ public: ...@@ -40,18 +40,18 @@ public:
namespace gpu { namespace gpu {
static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace gpu
#else #else
namespace cpu { namespace cpu {
static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace cpu
#ifdef __AVX__ #ifdef __AVX__
namespace avx { namespace avx {
static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace avx
#endif #endif
#endif #endif
......
...@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData, ...@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData,
const real ratioW); const real ratioW);
/** /**
* @brief Bilinear interpolation backward. * @brief Bilinear interpolation backward.
* *
* @param[out] inGrad input gradient. * @param[out] inGrad input gradient.
* @param[in] inImgH input image height. * @param[in] inImgH input image height.
* @param[in] inImgW input image width. * @param[in] inImgW input image width.
* @param[in] inputH input batchSize. * @param[in] inputH input batchSize.
* @param[in] inputW input image data dim. * @param[in] inputW input image data dim.
* @param[in] outGrad output gradient. * @param[in] outGrad output gradient.
* @param[in] outImgH output image height. * @param[in] outImgH output image height.
* @param[in] outImgW output image width. * @param[in] outImgW output image width.
* @param[in] outputH output batchSize. * @param[in] outputH output batchSize.
* @param[in] outputW output image data dim. * @param[in] outputW output image data dim.
* @param[in] numChannels number of channels. * @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH. * @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW. * @param[in] ratioW inImgW / outImgW.
* *
*/ */
extern void hl_bilinear_backward(real* inGrad, extern void hl_bilinear_backward(real* inGrad,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
......
...@@ -14,10 +14,9 @@ limitations under the License. */ ...@@ -14,10 +14,9 @@ limitations under the License. */
#include "hl_cuda_cublas.h" #include "hl_cuda_cublas.h"
#include <sys/time.h> #include <sys/time.h>
#include <mutex>
#include "hl_cuda.h" #include "hl_cuda.h"
#include "hl_dso_loader.h"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -15,10 +15,9 @@ limitations under the License. */ ...@@ -15,10 +15,9 @@ limitations under the License. */
#include "hl_cuda_cudnn.h" #include "hl_cuda_cudnn.h"
#include <cudnn.h> #include <cudnn.h>
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <mutex>
#include "hl_cuda_cudnn.ph" #include "hl_cuda_cudnn.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
DEFINE_int32(cudnn_conv_workspace_limit_in_mb, DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
......
...@@ -21,11 +21,10 @@ limitations under the License. */ ...@@ -21,11 +21,10 @@ limitations under the License. */
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include <mutex>
#include "hl_cuda.ph" #include "hl_cuda.ph"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoader.h"
// clang-format on // clang-format on
namespace dynload { namespace dynload {
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h" #include "hl_warpctc_wrap.h"
#include <mutex> #include <mutex>
#include "hl_dso_loader.h" #include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) { ...@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) {
} }
/** /**
* C += A * B, B, C dense, A sparse * C += A * B, B, C dense, A sparse
* dense = sparse * dense * dense = sparse * dense
*/ */
void testFuncDSparseDMatrix( void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
...@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) { ...@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) {
} }
/** /**
* C += A * B, A, C dense, B sparse * C += A * B, A, C dense, B sparse
* dense = dense * sparse * dense = dense * sparse
*/ */
void testFuncDDSparseMatrix( void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
...@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) { ...@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) {
} }
/** /**
* C += A * B, A sparse, B, C dense * C += A * B, A sparse, B, C dense
* sparse = dense * dense * sparse = dense * dense
*/ */
void testFuncSparseDDMatrix( void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
......
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
#include "MultiGradientMachine.h" #include "MultiGradientMachine.h"
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "NeuralNetwork.h" #include "NeuralNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h" #include "ParallelNeuralNetwork.h"
#include "hl_gpu.h" #include "hl_gpu.h"
......
...@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { ...@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
/* create scattered id infomation for all realLayer of inFrameLines one time. /* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void RecurrentGradientMachine::createInFrameInfo(int inlinkId, void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input, const Argument& input,
......
...@@ -107,18 +107,18 @@ public: ...@@ -107,18 +107,18 @@ public:
DropCallback; DropCallback;
/** /**
* @brief NormOrDropNodeCallback * @brief NormOrDropNodeCallback
* *
* Normalize a path's probabilities or just drop it by modifying path.logProb * Normalize a path's probabilities or just drop it by modifying path.logProb
* *
* The first parameter is sequence index in a batch * The first parameter is sequence index in a batch
* *
* The second parameter is path.ids * The second parameter is path.ids
* *
* The third parameter is probabilites for each node in this path. * The third parameter is probabilites for each node in this path.
* *
* The fourth parameter is the probability of the whole path. * The fourth parameter is the probability of the whole path.
*/ */
typedef std::function<void( typedef std::function<void(
int seqId, const std::vector<int>&, std::vector<real>&, real*)> int seqId, const std::vector<int>&, std::vector<real>&, real*)>
NormOrDropNodeCallback; NormOrDropNodeCallback;
...@@ -348,9 +348,9 @@ protected: ...@@ -348,9 +348,9 @@ protected:
int targetInfoInlinkId_; int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time. /* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void createInFrameInfo(int inlinks_id, void createInFrameInfo(int inlinks_id,
const Argument& input, const Argument& input,
PassType passType); PassType passType);
......
...@@ -106,9 +106,9 @@ protected: ...@@ -106,9 +106,9 @@ protected:
public: public:
/** /**
* Wait until all input value ready. * Wait until all input value ready.
* Called before Layer::forward() function. * Called before Layer::forward() function.
*/ */
virtual void waitInputValue(); virtual void waitInputValue();
/** /**
...@@ -118,9 +118,9 @@ public: ...@@ -118,9 +118,9 @@ public:
virtual void copyOutputToOtherDevice(); virtual void copyOutputToOtherDevice();
/** /**
* Wait until all output grad ready and merge them to output_.grad. * Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function. * Called before Layer::backward() function.
*/ */
virtual void waitAndMergeOutputGrad(); virtual void waitAndMergeOutputGrad();
/** /**
......
...@@ -29,7 +29,7 @@ namespace paddle { ...@@ -29,7 +29,7 @@ namespace paddle {
* *
* The config file api is rotate_layer * The config file api is rotate_layer
* *
*/ */
class RotateLayer : public Layer { class RotateLayer : public Layer {
public: public:
......
...@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) { ...@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) {
* thus, in this case, output_ has no sequenceStartPositions. * thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions. * case, we should compute the new sequenceStartPositions.
*/ */
if (type_) { if (type_) {
CHECK(input.subSequenceStartPositions) CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq"; << "when trans_type = seq, input must hasSubseq";
......
...@@ -15,6 +15,54 @@ limitations under the License. */ ...@@ -15,6 +15,54 @@ limitations under the License. */
#include "MathFunctions.h" #include "MathFunctions.h"
#include "hl_matrix_apply.cuh" #include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh" #include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoader.h"
namespace dynload {
std::once_flag lapack_dso_flag;
void* lapack_dso_handle = nullptr;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapack_func>(p_##__name)(args...); \
} \
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_ATLAS
#define PADDLE_SGETRF clapack_sgetrf
#define PADDLE_DGETRF clapack_dgetrf
#define PADDLE_SGETRI clapack_sgetri
#define PADDLE_DGETRI clapack_dgetri
#else
#define PADDLE_SGETRF LAPACKE_sgetrf
#define PADDLE_DGETRF LAPACKE_dgetrf
#define PADDLE_SGETRI LAPACKE_sgetri
#define PADDLE_DGETRI LAPACKE_dgetri
#endif
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(PADDLE_SGETRF) \
__macro(PADDLE_DGETRF) \
__macro(PADDLE_SGETRI) \
__macro(PADDLE_DGETRI)
// clang-format on
LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
} // namespace dynload
namespace paddle { namespace paddle {
...@@ -85,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order, ...@@ -85,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return clapack_sgetrf(order, M, N, A, lda, ipiv);
#else
return LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -104,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order, ...@@ -104,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return clapack_dgetrf(order, M, N, A, lda, ipiv);
#else
return LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -122,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order, ...@@ -122,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return clapack_sgetri(order, N, A, lda, ipiv);
#else
return LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -140,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order, ...@@ -140,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return clapack_dgetri(order, N, A, lda, ipiv);
#else
return LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0; return 0;
} }
......
...@@ -17,14 +17,11 @@ limitations under the License. */ ...@@ -17,14 +17,11 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL #ifdef PADDLE_USE_MKL
#include <mkl.h> #include <mkl.h>
#ifdef PADDLE_USE_LAPACK
#include <mkl_lapacke.h> #include <mkl_lapacke.h>
#endif
#else #else
extern "C" { extern "C" {
#include <cblas.h> #include <cblas.h>
} }
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
extern "C" { extern "C" {
#include <clapack.h> #include <clapack.h>
...@@ -33,7 +30,6 @@ extern "C" { ...@@ -33,7 +30,6 @@ extern "C" {
#include <lapacke.h> #include <lapacke.h>
#endif #endif
#endif #endif
#endif
#include <cmath> #include <cmath>
......
...@@ -37,7 +37,7 @@ limitations under the License. */ ...@@ -37,7 +37,7 @@ limitations under the License. */
* *
* AutoCompare test; * AutoCompare test;
* test.cmpWithoutArg<I...>(function, height, width) * test.cmpWithoutArg<I...>(function, height, width)
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorCheck.h" #include "TensorCheck.h"
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
...@@ -235,10 +236,15 @@ TEST(Matrix, unary) { ...@@ -235,10 +236,15 @@ TEST(Matrix, unary) {
testMatrixTranspose(height, width); testMatrixTranspose(height, width);
testMatrixRotate(height, width); testMatrixRotate(height, width);
} }
// inverse // inverse matrix
#ifdef PADDLE_USE_LAPACK void** dso_handler = nullptr;
testMatrixInverse(height); GetLapackDsoHandle(dso_handler);
#endif if (nullptr == *dso_handler) {
LOG(WARNING) << "Failed to find liblapack.so, please specify its path "
"using LD_LIBRARY_PATH.";
} else {
testMatrixInverse(height);
}
} }
} }
......
...@@ -126,7 +126,7 @@ protected: ...@@ -126,7 +126,7 @@ protected:
/* /*
* AdaDelta Optimization. * AdaDelta Optimization.
* http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf * http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf
*/ */
class AdaDeltaParameterOptimizer : public ParameterOptimizer { class AdaDeltaParameterOptimizer : public ParameterOptimizer {
public: public:
explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig) explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig)
......
...@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) { ...@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) {
} }
inline bool operator!=(const value& x, const value& y) { return !(x == y); } inline bool operator!=(const value& x, const value& y) { return !(x == y); }
} } // namespace picojson
namespace std { namespace std {
template <> template <>
inline void swap(picojson::value& x, picojson::value& y) { inline void swap(picojson::value& x, picojson::value& y) {
x.swap(y); x.swap(y);
} }
} } // namespace std
inline std::istream& operator>>(std::istream& is, picojson::value& x) { inline std::istream& operator>>(std::istream& is, picojson::value& x) {
picojson::set_last_error(std::string()); picojson::set_last_error(std::string());
......
...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "hl_dso_loader.h" #include "DynamicLoader.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "paddle/utils/Logging.h" #include "Logging.h"
DEFINE_string(cudnn_dir, DEFINE_string(cudnn_dir,
"", "",
...@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir, ...@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir,
DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");
DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");
static inline std::string join(const std::string& part1, static inline std::string join(const std::string& part1,
const std::string& part2) { const std::string& part2) {
// directory separator // directory separator
...@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) { ...@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle);
#endif #endif
} }
void GetLapackDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.dylib", dso_handle);
#else
GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.so", dso_handle);
#endif
}
...@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_DSO_LOADER_H_ #ifndef DYNAMIC_LOAD_H_
#define HL_DSO_LOADER_H_ #define DYNAMIC_LOAD_H_
#include <dlfcn.h> #include <dlfcn.h>
#include <memory> #include <memory>
#include <mutex>
#include <string> #include <string>
#include "hl_base.h"
/** /**
* @brief load the DSO of CUBLAS * @brief load the DSO of CUBLAS
...@@ -52,4 +52,12 @@ void GetCurandDsoHandle(void** dso_handle); ...@@ -52,4 +52,12 @@ void GetCurandDsoHandle(void** dso_handle);
*/ */
void GetWarpCTCDsoHandle(void** dso_handle); void GetWarpCTCDsoHandle(void** dso_handle);
#endif // HL_DSO_LOADER_H_ /**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void GetLapackDsoHandle(void** dso_handle);
#endif // DYNAMIC_LOAD_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册