Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
d10f6cfb
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d10f6cfb
编写于
5月 03, 2017
作者:
G
gangliao
提交者:
GitHub
5月 03, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1958 from gangliao/gfortran
remove gfortran and dlopen lapack libs
上级
18e5edf6
917b1929
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
149 addition
and
156 deletion
+149
-156
cmake/cblas.cmake
cmake/cblas.cmake
+0
-3
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+1
-30
paddle/cuda/CMakeLists.txt
paddle/cuda/CMakeLists.txt
+1
-5
paddle/cuda/include/hl_activation_functions.h
paddle/cuda/include/hl_activation_functions.h
+3
-3
paddle/cuda/include/hl_cnn.h
paddle/cuda/include/hl_cnn.h
+17
-17
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+1
-2
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+1
-2
paddle/cuda/src/hl_cuda_device.cc
paddle/cuda/src/hl_cuda_device.cc
+1
-2
paddle/cuda/src/hl_warpctc_wrap.cc
paddle/cuda/src/hl_warpctc_wrap.cc
+1
-1
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+9
-9
paddle/gserver/gradientmachines/GradientMachine.cpp
paddle/gserver/gradientmachines/GradientMachine.cpp
+0
-1
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+1
-1
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+15
-15
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+6
-6
paddle/gserver/layers/RotateLayer.h
paddle/gserver/layers/RotateLayer.h
+1
-1
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+1
-1
paddle/math/MathFunctions.cpp
paddle/math/MathFunctions.cpp
+52
-39
paddle/math/MathFunctions.h
paddle/math/MathFunctions.h
+0
-4
paddle/math/tests/TestUtils.h
paddle/math/tests/TestUtils.h
+1
-1
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+10
-4
paddle/parameter/FirstOrderOptimizer.h
paddle/parameter/FirstOrderOptimizer.h
+1
-1
paddle/trainer/tests/picojson.h
paddle/trainer/tests/picojson.h
+2
-2
paddle/utils/DynamicLoader.cpp
paddle/utils/DynamicLoader.cpp
+12
-2
paddle/utils/DynamicLoader.h
paddle/utils/DynamicLoader.h
+12
-4
未找到文件。
cmake/cblas.cmake
浏览文件 @
d10f6cfb
...
...
@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
message
(
STATUS
"Found MKL (include:
${
CBLAS_INC_DIR
}
, library:
${
CBLAS_LIBRARIES
}
)"
)
set
(
CBLAS_FOUND ON
)
if
(
${
MKL_LAPACK_INC_DIR
}
)
add_definitions
(
-DPADDLE_USE_LAPACK
)
message
(
STATUS
"Found lapack in MKL (include:
${
MKL_LAPACK_INC_DIR
}
)"
)
endif
()
return
()
# return file.
...
...
@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
message
(
STATUS
"Found ATLAS (include:
${
CBLAS_INC_DIR
}
, library:
${
CBLAS_LIBRARIES
}
)"
)
set
(
CBLAS_FOUND ON
)
if
(
ATLAS_CLAPACK_INC_DIR
)
add_definitions
(
-DPADDLE_USE_LAPACK
)
set
(
CBLAS_INC_DIR
${
CBLAS_INC_DIR
}
${
ATLAS_CLAPACK_INC_DIR
}
)
message
(
STATUS
"Found lapack in ATLAS (include:
${
ATLAS_CLAPACK_INC_DIR
}
)"
)
endif
()
...
...
@@ -115,7 +113,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
message
(
STATUS
"Found OpenBLAS (include:
${
CBLAS_INC_DIR
}
, library:
${
CBLAS_LIBRARIES
}
)"
)
set
(
CBLAS_FOUND ON
)
if
(
OPENBLAS_LAPACKE_INC_DIR
)
add_definitions
(
-DPADDLE_USE_LAPACK
)
message
(
STATUS
"Found lapack in OpenBLAS (include:
${
OPENBLAS_LAPACKE_INC_DIR
}
)"
)
endif
()
return
()
...
...
cmake/external/openblas.cmake
浏览文件 @
d10f6cfb
...
...
@@ -27,35 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET
(
CBLAS_LIBRARIES
"
${
CBLAS_INSTALL_DIR
}
/lib/libopenblas.a"
CACHE FILEPATH
"openblas library"
FORCE
)
ENDIF
(
WIN32
)
IF
(
CMAKE_COMPILER_IS_GNUCC
)
ENABLE_LANGUAGE
(
Fortran
)
if
(
NOT CMAKE_Fortran_COMPILER_VERSION
)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process
(
COMMAND
${
CMAKE_Fortran_COMPILER
}
-dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION
)
endif
()
string
(
REGEX MATCHALL
"[0-9]+"
Fortran_VERSION
${
CMAKE_Fortran_COMPILER_VERSION
}
)
list
(
GET Fortran_VERSION 0 Fortran_MAJOR
)
list
(
GET Fortran_VERSION 1 Fortran_MINOR
)
find_library
(
GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/
${
Fortran_MAJOR
}
.
${
Fortran_MINOR
}
/
/usr/lib/gcc/x86_64-linux-gnu/
${
Fortran_MAJOR
}
/
)
if
(
NOT GFORTRAN_LIBRARY
)
message
(
FATAL_ERROR
"Cannot found gfortran library which it is used by openblas"
)
endif
()
find_package
(
Threads REQUIRED
)
LIST
(
APPEND CBLAS_LIBRARIES
${
GFORTRAN_LIBRARY
}
${
CMAKE_THREAD_LIBS_INIT
}
)
ENDIF
(
CMAKE_COMPILER_IS_GNUCC
)
IF
(
NOT CMAKE_Fortran_COMPILER
)
MESSAGE
(
FATAL_ERROR
"To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=..."
)
ENDIF
(
NOT CMAKE_Fortran_COMPILER
)
ADD_DEFINITIONS
(
-DPADDLE_USE_LAPACK
)
ExternalProject_Add
(
openblas
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -64,7 +35,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX
${
CBLAS_SOURCES_DIR
}
INSTALL_DIR
${
CBLAS_INSTALL_DIR
}
BUILD_IN_SOURCE 1
BUILD_COMMAND
${
CMAKE_MAKE_PROGRAM
}
FC=
${
CMAKE_Fortran_COMPILER
}
CC=
${
CMAKE_C_COMPILER
}
HOSTCC=
${
CMAKE_C_COMPILER
}
DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
BUILD_COMMAND
${
CMAKE_MAKE_PROGRAM
}
FC=
${
CMAKE_Fortran_COMPILER
}
CC=
${
CMAKE_C_COMPILER
}
HOSTCC=
${
CMAKE_C_COMPILER
}
NO_LAPACK=1
DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND
${
CMAKE_MAKE_PROGRAM
}
install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
...
...
paddle/cuda/CMakeLists.txt
浏览文件 @
d10f6cfb
...
...
@@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES
if
(
WITH_GPU
)
set
(
CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc
${
CUDA_CXX_WITH_GPU_SOURCES
}
)
set_source_files_properties
(
${
CUDA_CXX_SOURCES
}
PROPERTIES COMPILE_FLAGS
"-D__NVCC__"
)
else
()
set
(
CUDA_CXX_SOURCES
src/hl_dso_loader.cc
src/hl_warpctc_wrap.cc
)
set
(
CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc
)
endif
()
set
(
CUDA_CU_SOURCES
...
...
@@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES
set
(
CUDA_HEADERS
include/hl_time.h
include/hl_dso_loader.h
include/hl_warpctc_wrap.h
include/hl_sequence.h
include/hl_cuda_cublas.h
...
...
paddle/cuda/include/hl_activation_functions.h
浏览文件 @
d10f6cfb
...
...
@@ -40,18 +40,18 @@ public:
namespace
gpu
{
static
__device__
Active
<
real
>::
forward
forward
[]
=
HPPL_ACTIVE_FUNCTION
;
static
__device__
Active
<
real
>::
backward
backward
[]
=
HPPL_ACTIVE_FUNCTION
;
}
}
// namespace gpu
#else
namespace
cpu
{
static
Active
<
real
>::
forward
forward
[]
=
HPPL_ACTIVE_FUNCTION
;
static
Active
<
real
>::
backward
backward
[]
=
HPPL_ACTIVE_FUNCTION
;
}
}
// namespace cpu
#ifdef __AVX__
namespace
avx
{
static
Active
<
__m256
>::
forward
forward
[]
=
HPPL_ACTIVE_FUNCTION
;
static
Active
<
__m256
>::
backward
backward
[]
=
HPPL_ACTIVE_FUNCTION
;
}
}
// namespace avx
#endif
#endif
...
...
paddle/cuda/include/hl_cnn.h
浏览文件 @
d10f6cfb
...
...
@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData,
const
real
ratioW
);
/**
* @brief Bilinear interpolation backward.
*
* @param[out] inGrad input gradient.
* @param[in] inImgH input image height.
* @param[in] inImgW input image width.
* @param[in] inputH input batchSize.
* @param[in] inputW input image data dim.
* @param[in] outGrad output gradient.
* @param[in] outImgH output image height.
* @param[in] outImgW output image width.
* @param[in] outputH output batchSize.
* @param[in] outputW output image data dim.
* @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW.
*
*/
* @brief Bilinear interpolation backward.
*
* @param[out] inGrad input gradient.
* @param[in] inImgH input image height.
* @param[in] inImgW input image width.
* @param[in] inputH input batchSize.
* @param[in] inputW input image data dim.
* @param[in] outGrad output gradient.
* @param[in] outImgH output image height.
* @param[in] outImgW output image width.
* @param[in] outputH output batchSize.
* @param[in] outputW output image data dim.
* @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW.
*
*/
extern
void
hl_bilinear_backward
(
real
*
inGrad
,
const
size_t
inImgH
,
const
size_t
inImgW
,
...
...
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
d10f6cfb
...
...
@@ -14,10 +14,9 @@ limitations under the License. */
#include "hl_cuda_cublas.h"
#include <sys/time.h>
#include <mutex>
#include "hl_cuda.h"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h"
namespace
dynload
{
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
d10f6cfb
...
...
@@ -15,10 +15,9 @@ limitations under the License. */
#include "hl_cuda_cudnn.h"
#include <cudnn.h>
#include <gflags/gflags.h>
#include <mutex>
#include "hl_cuda_cudnn.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h"
DEFINE_int32
(
cudnn_conv_workspace_limit_in_mb
,
...
...
paddle/cuda/src/hl_cuda_device.cc
浏览文件 @
d10f6cfb
...
...
@@ -21,11 +21,10 @@ limitations under the License. */
#include <sys/syscall.h>
#include <sys/time.h>
#include <unistd.h>
#include <mutex>
#include "hl_cuda.ph"
#include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoader.h"
// clang-format on
namespace
dynload
{
...
...
paddle/cuda/src/hl_warpctc_wrap.cc
浏览文件 @
d10f6cfb
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h"
#include <mutex>
#include "
hl_dso_l
oader.h"
#include "
paddle/utils/DynamicL
oader.h"
#include "paddle/utils/Logging.h"
namespace
dynload
{
...
...
paddle/function/MulOpTest.cpp
浏览文件 @
d10f6cfb
...
...
@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) {
}
/**
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
void
testFuncDSparseDMatrix
(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
...
...
@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) {
}
/**
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
void
testFuncDDSparseMatrix
(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
...
...
@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) {
}
/**
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
void
testFuncSparseDDMatrix
(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
...
...
paddle/gserver/gradientmachines/GradientMachine.cpp
浏览文件 @
d10f6cfb
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "MultiGradientMachine.h"
#include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h"
#include "hl_gpu.h"
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
d10f6cfb
...
...
@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
*/
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlinkId
,
const
Argument
&
input
,
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
d10f6cfb
...
...
@@ -107,18 +107,18 @@ public:
DropCallback
;
/**
* @brief NormOrDropNodeCallback
*
* Normalize a path's probabilities or just drop it by modifying path.logProb
*
* The first parameter is sequence index in a batch
*
* The second parameter is path.ids
*
* The third parameter is probabilites for each node in this path.
*
* The fourth parameter is the probability of the whole path.
*/
* @brief NormOrDropNodeCallback
*
* Normalize a path's probabilities or just drop it by modifying path.logProb
*
* The first parameter is sequence index in a batch
*
* The second parameter is path.ids
*
* The third parameter is probabilites for each node in this path.
*
* The fourth parameter is the probability of the whole path.
*/
typedef
std
::
function
<
void
(
int
seqId
,
const
std
::
vector
<
int
>&
,
std
::
vector
<
real
>&
,
real
*
)
>
NormOrDropNodeCallback
;
...
...
@@ -348,9 +348,9 @@ protected:
int
targetInfoInlinkId_
;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void
createInFrameInfo
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
...
...
paddle/gserver/layers/Layer.h
浏览文件 @
d10f6cfb
...
...
@@ -106,9 +106,9 @@ protected:
public:
/**
* Wait until all input value ready.
* Called before Layer::forward() function.
*/
* Wait until all input value ready.
* Called before Layer::forward() function.
*/
virtual
void
waitInputValue
();
/**
...
...
@@ -118,9 +118,9 @@ public:
virtual
void
copyOutputToOtherDevice
();
/**
* Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function.
*/
* Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function.
*/
virtual
void
waitAndMergeOutputGrad
();
/**
...
...
paddle/gserver/layers/RotateLayer.h
浏览文件 @
d10f6cfb
...
...
@@ -29,7 +29,7 @@ namespace paddle {
*
* The config file api is rotate_layer
*
*/
*/
class
RotateLayer
:
public
Layer
{
public:
...
...
paddle/gserver/layers/SequencePoolLayer.cpp
浏览文件 @
d10f6cfb
...
...
@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) {
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
*/
if
(
type_
)
{
CHECK
(
input
.
subSequenceStartPositions
)
<<
"when trans_type = seq, input must hasSubseq"
;
...
...
paddle/math/MathFunctions.cpp
浏览文件 @
d10f6cfb
...
...
@@ -15,6 +15,54 @@ limitations under the License. */
#include "MathFunctions.h"
#include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoader.h"
namespace
dynload
{
std
::
once_flag
lapack_dso_flag
;
void
*
lapack_dso_handle
=
nullptr
;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load lapack routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
return reinterpret_cast<lapack_func>(p_##__name)(args...); \
} \
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_ATLAS
#define PADDLE_SGETRF clapack_sgetrf
#define PADDLE_DGETRF clapack_dgetrf
#define PADDLE_SGETRI clapack_sgetri
#define PADDLE_DGETRI clapack_dgetri
#else
#define PADDLE_SGETRF LAPACKE_sgetrf
#define PADDLE_DGETRF LAPACKE_dgetrf
#define PADDLE_SGETRI LAPACKE_sgetri
#define PADDLE_DGETRI LAPACKE_dgetri
#endif
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(PADDLE_SGETRF) \
__macro(PADDLE_DGETRF) \
__macro(PADDLE_SGETRI) \
__macro(PADDLE_DGETRI)
// clang-format on
LAPACK_ROUTINE_EACH
(
DYNAMIC_LOAD_LAPACK_WRAP
)
}
// namespace dynload
namespace
paddle
{
...
...
@@ -85,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
float
*
A
,
const
int
lda
,
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_sgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
#endif
return
0
;
return
dynload
::
PADDLE_SGETRF
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
}
template
<
>
...
...
@@ -104,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
double
*
A
,
const
int
lda
,
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_dgetrf
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
#endif
return
0
;
return
dynload
::
PADDLE_DGETRF
(
order
,
M
,
N
,
A
,
lda
,
ipiv
);
}
template
<
>
...
...
@@ -122,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
float
*
A
,
const
int
lda
,
const
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_sgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
#endif
return
0
;
return
dynload
::
PADDLE_SGETRI
(
order
,
N
,
A
,
lda
,
ipiv
);
}
template
<
>
...
...
@@ -140,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
double
*
A
,
const
int
lda
,
const
int
*
ipiv
)
{
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return
clapack_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#else
return
LAPACKE_dgetri
(
order
,
N
,
A
,
lda
,
ipiv
);
#endif
#else
LOG
(
FATAL
)
<<
"Not implemented"
;
#endif
return
dynload
::
PADDLE_DGETRI
(
order
,
N
,
A
,
lda
,
ipiv
);
return
0
;
}
...
...
paddle/math/MathFunctions.h
浏览文件 @
d10f6cfb
...
...
@@ -17,14 +17,11 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#ifdef PADDLE_USE_LAPACK
#include <mkl_lapacke.h>
#endif
#else
extern
"C"
{
#include <cblas.h>
}
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
extern
"C"
{
#include <clapack.h>
...
...
@@ -33,7 +30,6 @@ extern "C" {
#include <lapacke.h>
#endif
#endif
#endif
#include <cmath>
...
...
paddle/math/tests/TestUtils.h
浏览文件 @
d10f6cfb
...
...
@@ -37,7 +37,7 @@ limitations under the License. */
*
* AutoCompare test;
* test.cmpWithoutArg<I...>(function, height, width)
*/
*/
#include <gtest/gtest.h>
#include "TensorCheck.h"
...
...
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
d10f6cfb
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/testing/TestUtil.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
...
...
@@ -235,10 +236,15 @@ TEST(Matrix, unary) {
testMatrixTranspose
(
height
,
width
);
testMatrixRotate
(
height
,
width
);
}
// inverse
#ifdef PADDLE_USE_LAPACK
testMatrixInverse
(
height
);
#endif
// inverse matrix
void
**
dso_handler
=
nullptr
;
GetLapackDsoHandle
(
dso_handler
);
if
(
nullptr
==
*
dso_handler
)
{
LOG
(
WARNING
)
<<
"Failed to find liblapack.so, please specify its path "
"using LD_LIBRARY_PATH."
;
}
else
{
testMatrixInverse
(
height
);
}
}
}
...
...
paddle/parameter/FirstOrderOptimizer.h
浏览文件 @
d10f6cfb
...
...
@@ -126,7 +126,7 @@ protected:
/*
* AdaDelta Optimization.
* http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf
*/
*/
class
AdaDeltaParameterOptimizer
:
public
ParameterOptimizer
{
public:
explicit
AdaDeltaParameterOptimizer
(
const
OptimizationConfig
&
optConfig
)
...
...
paddle/trainer/tests/picojson.h
浏览文件 @
d10f6cfb
...
...
@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) {
}
inline
bool
operator
!=
(
const
value
&
x
,
const
value
&
y
)
{
return
!
(
x
==
y
);
}
}
}
// namespace picojson
namespace
std
{
template
<
>
inline
void
swap
(
picojson
::
value
&
x
,
picojson
::
value
&
y
)
{
x
.
swap
(
y
);
}
}
}
// namespace std
inline
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
picojson
::
value
&
x
)
{
picojson
::
set_last_error
(
std
::
string
());
...
...
paddle/
cuda/src/hl_dso_loader.cc
→
paddle/
utils/DynamicLoader.cpp
浏览文件 @
d10f6cfb
...
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "
hl_dso_l
oader.h"
#include "
DynamicL
oader.h"
#include <gflags/gflags.h>
#include "
paddle/utils/
Logging.h"
#include "Logging.h"
DEFINE_string
(
cudnn_dir
,
""
,
...
...
@@ -30,6 +30,8 @@ DEFINE_string(cuda_dir,
DEFINE_string
(
warpctc_dir
,
""
,
"Specify path for loading libwarpctc.so."
);
DEFINE_string
(
lapack_dir
,
""
,
"Specify path for loading liblapack.so."
);
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
...
...
@@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
,
dso_handle
);
#endif
}
void
GetLapackDsoHandle
(
void
**
dso_handle
)
{
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapack.dylib"
,
dso_handle
);
#else
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapack.so"
,
dso_handle
);
#endif
}
paddle/
cuda/include/hl_dso_l
oader.h
→
paddle/
utils/DynamicL
oader.h
浏览文件 @
d10f6cfb
...
...
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef
HL_DSO_LOADER
_H_
#define
HL_DSO_LOADER
_H_
#ifndef
DYNAMIC_LOAD
_H_
#define
DYNAMIC_LOAD
_H_
#include <dlfcn.h>
#include <memory>
#include <mutex>
#include <string>
#include "hl_base.h"
/**
* @brief load the DSO of CUBLAS
...
...
@@ -52,4 +52,12 @@ void GetCurandDsoHandle(void** dso_handle);
*/
void
GetWarpCTCDsoHandle
(
void
**
dso_handle
);
#endif // HL_DSO_LOADER_H_
/**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void
GetLapackDsoHandle
(
void
**
dso_handle
);
#endif // DYNAMIC_LOAD_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录