Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5b8fe87f
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b8fe87f
编写于
5月 02, 2017
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dlopen lapacke api and remove gfotran
上级
896b9c55
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
93 addition
and
64 deletion
+93
-64
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+1
-28
paddle/cuda/CMakeLists.txt
paddle/cuda/CMakeLists.txt
+1
-5
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+1
-1
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+1
-1
paddle/cuda/src/hl_cuda_device.cc
paddle/cuda/src/hl_cuda_device.cc
+14
-14
paddle/cuda/src/hl_warpctc_wrap.cc
paddle/cuda/src/hl_warpctc_wrap.cc
+1
-1
paddle/math/MathFunctions.cpp
paddle/math/MathFunctions.cpp
+51
-8
paddle/utils/DynamicLoad.cc
paddle/utils/DynamicLoad.cc
+12
-2
paddle/utils/DynamicLoad.h
paddle/utils/DynamicLoad.h
+11
-4
未找到文件。
cmake/external/openblas.cmake
浏览文件 @
5b8fe87f
...
...
@@ -27,33 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET
(
CBLAS_LIBRARIES
"
${
CBLAS_INSTALL_DIR
}
/lib/libopenblas.a"
CACHE FILEPATH
"openblas library"
FORCE
)
ENDIF
(
WIN32
)
IF
(
CMAKE_COMPILER_IS_GNUCC
)
ENABLE_LANGUAGE
(
Fortran
)
if
(
NOT CMAKE_Fortran_COMPILER_VERSION
)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process
(
COMMAND
${
CMAKE_Fortran_COMPILER
}
-dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION
)
endif
()
string
(
REGEX MATCHALL
"[0-9]+"
Fortran_VERSION
${
CMAKE_Fortran_COMPILER_VERSION
}
)
list
(
GET Fortran_VERSION 0 Fortran_MAJOR
)
list
(
GET Fortran_VERSION 1 Fortran_MINOR
)
find_library
(
GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/
${
Fortran_MAJOR
}
.
${
Fortran_MINOR
}
/
/usr/lib/gcc/x86_64-linux-gnu/
${
Fortran_MAJOR
}
/
)
if
(
NOT GFORTRAN_LIBRARY
)
message
(
FATAL_ERROR
"Cannot found gfortran library which it is used by openblas"
)
endif
()
find_package
(
Threads REQUIRED
)
LIST
(
APPEND CBLAS_LIBRARIES
${
GFORTRAN_LIBRARY
}
${
CMAKE_THREAD_LIBS_INIT
}
)
ENDIF
(
CMAKE_COMPILER_IS_GNUCC
)
IF
(
NOT CMAKE_Fortran_COMPILER
)
MESSAGE
(
FATAL_ERROR
"To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=..."
)
ENDIF
(
NOT CMAKE_Fortran_COMPILER
)
ADD_DEFINITIONS
(
-DPADDLE_USE_LAPACK
)
ExternalProject_Add
(
...
...
@@ -64,7 +37,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX
${
CBLAS_SOURCES_DIR
}
INSTALL_DIR
${
CBLAS_INSTALL_DIR
}
BUILD_IN_SOURCE 1
BUILD_COMMAND
${
CMAKE_MAKE_PROGRAM
}
FC=
${
CMAKE_Fortran_COMPILER
}
CC=
${
CMAKE_C_COMPILER
}
HOSTCC=
${
CMAKE_C_COMPILER
}
DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
BUILD_COMMAND
${
CMAKE_MAKE_PROGRAM
}
FC=
${
CMAKE_Fortran_COMPILER
}
CC=
${
CMAKE_C_COMPILER
}
HOSTCC=
${
CMAKE_C_COMPILER
}
NO_LAPACK=1
DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND
${
CMAKE_MAKE_PROGRAM
}
install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
...
...
paddle/cuda/CMakeLists.txt
浏览文件 @
5b8fe87f
...
...
@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES
if
(
WITH_GPU
)
set
(
CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc
${
CUDA_CXX_WITH_GPU_SOURCES
}
)
set_source_files_properties
(
${
CUDA_CXX_SOURCES
}
PROPERTIES COMPILE_FLAGS
"-D__NVCC__"
)
else
()
set
(
CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc
)
set
(
CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc
)
endif
()
set
(
CUDA_CU_SOURCES
...
...
@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES
set
(
CUDA_HEADERS
include/hl_time.h
include/hl_dso_loader.h
include/hl_warpctc_wrap.h
include/hl_sequence.h
include/hl_cuda_cublas.h
...
...
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
5b8fe87f
...
...
@@ -16,8 +16,8 @@ limitations under the License. */
#include <sys/time.h>
#include <mutex>
#include "hl_cuda.h"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h"
namespace
dynload
{
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
5b8fe87f
...
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <mutex>
#include "hl_cuda_cudnn.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/Logging.h"
DEFINE_int32
(
cudnn_conv_workspace_limit_in_mb
,
...
...
paddle/cuda/src/hl_cuda_device.cc
浏览文件 @
5b8fe87f
...
...
@@ -24,8 +24,8 @@ limitations under the License. */
#include <mutex>
#include "hl_cuda.ph"
#include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoad.h"
// clang-format on
namespace
dynload
{
...
...
@@ -98,11 +98,11 @@ int g_cuda_lib_version = 0;
* Check build-in cuda function using glog and it **does not**
* support << operator for more details error info.
*/
#define CHECK_CUDA(cudaFunc)
\
do {
\
cudaError_t cudaStat = cudaFunc;
\
CHECK_EQ(cudaSuccess, cudaStat)
<< "Cuda Error: "
\
<< cudaGetErrorString(cudaStat); \
#define CHECK_CUDA(cudaFunc) \
do { \
cudaError_t cudaStat = cudaFunc; \
CHECK_EQ(cudaSuccess, cudaStat)
\
<< "Cuda Error: "
<< cudaGetErrorString(cudaStat); \
} while (0)
/**
...
...
@@ -469,8 +469,8 @@ void hl_specify_devices_start(int *device, int number) {
CHECK
(
tmp
)
<<
"[Start failed] System memory is not enough."
;
g_device
=
(
hl_device_prop
*
)
tmp
;
device_prop
=
(
hl_device_prop
)(
(
char
*
)
tmp
+
g_system_device_num
*
sizeof
(
hl_device_prop
*
));
device_prop
=
(
hl_device_prop
)(
(
char
*
)
tmp
+
g_system_device_num
*
sizeof
(
hl_device_prop
*
));
memset
(
g_device
,
0
,
g_system_device_num
*
sizeof
(
hl_device_prop
*
));
int
num
=
0
;
for
(
int
i
=
0
;
i
<
number
;
i
++
)
{
...
...
@@ -559,8 +559,8 @@ bool hl_get_sync_flag() { return g_sync_flag; }
void
hl_stream_synchronize
(
hl_stream_t
stream
)
{
cudaStream_t
cu_stream
;
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
cu_stream
=
t_resource
.
stream
[
stream
];
CHECK_CUDA
(
cudaStreamSynchronize
(
cu_stream
));
...
...
@@ -590,8 +590,8 @@ void hl_stream_record_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t
cu_stream
;
CHECK_NOTNULL
(
event
);
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
cu_stream
=
t_resource
.
stream
[
stream
];
CHECK_CUDA
(
cudaEventRecord
(
event
->
cu_event
,
cu_stream
));
...
...
@@ -601,8 +601,8 @@ void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t
cu_stream
;
CHECK_NOTNULL
(
event
);
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
CHECK_LT
(
stream
,
HPPL_STREAM_END
)
<<
__func__
<<
": the parameter stream is error."
;
cu_stream
=
t_resource
.
stream
[
stream
];
CHECK_CUDA
(
cudaStreamWaitEvent
(
cu_stream
,
event
->
cu_event
,
0
));
...
...
paddle/cuda/src/hl_warpctc_wrap.cc
浏览文件 @
5b8fe87f
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h"
#include <mutex>
#include "
hl_dso_loader
.h"
#include "
paddle/utils/DynamicLoad
.h"
#include "paddle/utils/Logging.h"
namespace
dynload
{
...
...
paddle/math/MathFunctions.cpp
浏览文件 @
5b8fe87f
...
...
@@ -15,6 +15,49 @@ limitations under the License. */
#include "MathFunctions.h"
#include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoad.h"
namespace
dynload
{
std
::
once_flag
lapack_dso_flag
;
void
*
lapack_dso_handle
=
nullptr
;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
int operator()(Args... args)->decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapack_func>(p_##__name)(args...); \
} \
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(clapack_sgetrf) \
__macro(clapack_dgetrf) \
__macro(clapack_sgetri) \
__macro(clapack_dgetri)
#else
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(LAPACKE_sgetrf) \
__macro(LAPACKE_dgetrf) \
__macro(LAPACKE_sgetri) \
__macro(LAPACKE_dgetri)
#endif
#endif
// clang-format on
}
// namespace dynload
namespace
paddle
{
...
...
@@ -87,9 +130,9 @@ int getrf<float>(const CBLAS_ORDER order,
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
clapack_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
LAPACKE_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
...
...
@@ -106,9 +149,9 @@ int getrf<double>(const CBLAS_ORDER order,
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
clapack_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
LAPACKE_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
...
...
@@ -124,9 +167,9 @@ int getri<float>(const CBLAS_ORDER order,
const
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
clapack_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
LAPACKE_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
...
...
@@ -142,9 +185,9 @@ int getri<double>(const CBLAS_ORDER order,
const
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
clapack_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
return
dynload
::
LAPACKE_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
...
...
paddle/
cuda/src/hl_dso_loader
.cc
→
paddle/
utils/DynamicLoad
.cc
浏览文件 @
5b8fe87f
...
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_dso_loader.h"
#include "DynamicLoad.h"
#include "Logging.h"
#include <gflags/gflags.h>
#include "paddle/utils/Logging.h"
DEFINE_string
(
cudnn_dir
,
""
,
...
...
@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir,
DEFINE_string
(
warpctc_dir
,
""
,
"Specify path for loading libwarpctc.so."
);
DEFINE_string
(
lapack_dir
,
""
,
"Specify path for loading liblapack.so."
);
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
...
...
@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
,
dso_handle
);
#endif
}
void
GetLapackDsoHandle
(
void
**
dso_handle
)
{
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"liblapack.dylib"
,
dso_handle
);
#else
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"liblapack.so"
,
dso_handle
);
#endif
}
paddle/
cuda/include/hl_dso_loader
.h
→
paddle/
utils/DynamicLoad
.h
浏览文件 @
5b8fe87f
...
...
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_D
SO_LOADER
_H_
#define HL_D
SO_LOADER
_H_
#ifndef HL_D
YNAMIC_LOAD
_H_
#define HL_D
YNAMIC_LOAD
_H_
#include <dlfcn.h>
#include <memory>
#include <string>
#include "hl_base.h"
/**
* @brief load the DSO of CUBLAS
...
...
@@ -52,4 +51,12 @@ void GetCurandDsoHandle(void** dso_handle);
*/
void
GetWarpCTCDsoHandle
(
void
**
dso_handle
);
#endif // HL_DSO_LOADER_H_
/**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void
GetLapackDsoHandle
(
void
**
dso_handle
);
#endif // HL_DYNAMIC_LOAD_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录