Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ccea3b02
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ccea3b02
编写于
11月 20, 2016
作者:
L
liaogang
提交者:
Yu Yang
11月 22, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add style check for *.cc files in cuda directory
上级
5d511a16
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
112 addition
and
132 deletion
+112
-132
paddle/cuda/CMakeLists.txt
paddle/cuda/CMakeLists.txt
+5
-2
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+7
-8
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+64
-79
paddle/cuda/src/hl_cuda_device.cc
paddle/cuda/src/hl_cuda_device.cc
+7
-5
paddle/cuda/src/hl_cudart_wrap.cc
paddle/cuda/src/hl_cudart_wrap.cc
+8
-18
paddle/cuda/src/hl_dso_loader.cc
paddle/cuda/src/hl_dso_loader.cc
+21
-20
未找到文件。
paddle/cuda/CMakeLists.txt
浏览文件 @
ccea3b02
...
@@ -81,5 +81,8 @@ else()
...
@@ -81,5 +81,8 @@ else()
add_library
(
paddle_cuda
${
CUDA_SOURCES
}
)
add_library
(
paddle_cuda
${
CUDA_SOURCES
}
)
endif
()
endif
()
add_style_check_target
(
paddle_cuda
${
CUDA_SOURCES
}
)
add_style_check_target
(
paddle_cuda
add_style_check_target
(
paddle_cuda
${
CUDA_HEADERS
}
)
${
CUDA_SOURCES
}
${
CUDA_HEADERS
}
${
CUDA_DSO_SOURCES
}
${
CUDA_CXX_WITH_GPU_SOURCES
}
)
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
ccea3b02
...
@@ -104,7 +104,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
...
@@ -104,7 +104,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
#endif
#endif
const
char
*
hl_cublas_get_error_string
(
cublasStatus_t
status
)
{
const
char
*
hl_cublas_get_error_string
(
cublasStatus_t
status
)
{
switch
(
status
)
{
switch
(
status
)
{
case
CUBLAS_STATUS_NOT_INITIALIZED
:
case
CUBLAS_STATUS_NOT_INITIALIZED
:
return
"[cublas status]: not initialized"
;
return
"[cublas status]: not initialized"
;
case
CUBLAS_STATUS_ALLOC_FAILED
:
case
CUBLAS_STATUS_ALLOC_FAILED
:
...
@@ -181,7 +181,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -181,7 +181,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
real
**
inout_d
=
(
real
**
)
hl_malloc_device
(
sizeof
(
real
*
));
real
**
inout_d
=
(
real
**
)
hl_malloc_device
(
sizeof
(
real
*
));
hl_memcpy
(
inout_d
,
inout_h
,
sizeof
(
real
*
));
hl_memcpy
(
inout_d
,
inout_h
,
sizeof
(
real
*
));
int
*
pivot_d
=
(
int
*
)
hl_malloc_device
(
dimN
*
sizeof
(
int
));
int
*
pivot_d
=
(
int
*
)
hl_malloc_device
(
dimN
*
sizeof
(
int
));
int
*
info_d
=
(
int
*
)
t_resource
.
gpu_mem
;
int
*
info_d
=
(
int
*
)
t_resource
.
gpu_mem
;
/* Note: cublasSgetrfBatched is used to calculate a number of
/* Note: cublasSgetrfBatched is used to calculate a number of
...
@@ -189,10 +189,9 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -189,10 +189,9 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
the API for better performance.
the API for better performance.
*/
*/
CHECK_CUBLAS
(
CUBLAS_GETRF
(
t_resource
.
handle
,
CHECK_CUBLAS
(
CUBLAS_GETRF
(
t_resource
.
handle
,
dimN
,
inout_d
,
lda
,
pivot_d
,
dimN
,
inout_d
,
lda
,
pivot_d
,
info_d
,
1
));
info_d
,
1
));
int
info_h
;
int
info_h
;
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
if
(
info_h
!=
0
)
{
if
(
info_h
!=
0
)
{
LOG
(
FATAL
)
<<
"Factorization of matrix failed: matrix may be singular.
\n
"
;
LOG
(
FATAL
)
<<
"Factorization of matrix failed: matrix may be singular.
\n
"
;
...
@@ -204,8 +203,8 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -204,8 +203,8 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_memcpy
(
out_d
,
out_h
,
sizeof
(
real
*
));
hl_memcpy
(
out_d
,
out_h
,
sizeof
(
real
*
));
CHECK_CUBLAS
(
CUBLAS_GETRI
(
t_resource
.
handle
,
CHECK_CUBLAS
(
CUBLAS_GETRI
(
t_resource
.
handle
,
dimN
,
(
const
real
**
)
inout_d
,
lda
,
pivot_d
,
dimN
,
(
const
real
**
)
inout_d
,
lda
,
pivot_d
,
out_d
,
ldc
,
info_d
,
1
));
out_d
,
ldc
,
info_d
,
1
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
if
(
info_h
!=
0
)
{
if
(
info_h
!=
0
)
{
...
@@ -215,7 +214,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -215,7 +214,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_free_mem_device
(
inout_d
);
hl_free_mem_device
(
inout_d
);
hl_free_mem_device
(
pivot_d
);
hl_free_mem_device
(
pivot_d
);
hl_free_mem_device
(
out_d
);
hl_free_mem_device
(
out_d
);
CHECK_SYNC
(
"hl_matrix_inverse failed"
);
CHECK_SYNC
(
"hl_matrix_inverse failed"
);
}
}
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
ccea3b02
...
@@ -159,13 +159,11 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
...
@@ -159,13 +159,11 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
bool
g_is_libcudnn_init
=
false
;
bool
g_is_libcudnn_init
=
false
;
int
g_cudnn_lib_version
=
0
;
int
g_cudnn_lib_version
=
0
;
void
hl_cudnn_desc_init
(
cudnnTensorDescriptor_t
*
cudnn_desc
)
void
hl_cudnn_desc_init
(
cudnnTensorDescriptor_t
*
cudnn_desc
)
{
{
CHECK_CUDNN
(
dynload
::
cudnnCreateTensorDescriptor
(
cudnn_desc
));
CHECK_CUDNN
(
dynload
::
cudnnCreateTensorDescriptor
(
cudnn_desc
));
}
}
void
hl_cudnn_init
(
cudnnHandle_t
*
cudnn_handle
,
cudaStream_t
stream
)
void
hl_cudnn_init
(
cudnnHandle_t
*
cudnn_handle
,
cudaStream_t
stream
)
{
{
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
size_t
cudnn_dso_major
=
cudnn_dso_ver
/
1000
;
size_t
cudnn_dso_major
=
cudnn_dso_ver
/
1000
;
size_t
cudnn_cuh_major
=
CUDNN_VERSION
/
1000
;
size_t
cudnn_cuh_major
=
CUDNN_VERSION
/
1000
;
...
@@ -212,13 +210,18 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -212,13 +210,18 @@ void hl_conv_workspace(hl_tensor_descriptor input,
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
// Specify workspace limit directly
// Specify workspace limit directly
size_t
memoryLimitBytes
=
(
1LL
<<
20
)
*
FLAGS_cudnn_conv_workspace_limit_in_mb
;
size_t
memoryLimitBytes
=
(
1LL
<<
20
)
*
FLAGS_cudnn_conv_workspace_limit_in_mb
;
// cudnn convolution forward configuration
// cudnn convolution forward configuration
cudnnTensorDescriptor_t
fwd_src_desc
=
GET_TENSOR_DESCRIPTOR
(
input
);
cudnnTensorDescriptor_t
fwd_src_desc
=
cudnnTensorDescriptor_t
fwd_dest_desc
=
GET_TENSOR_DESCRIPTOR
(
output
);
GET_TENSOR_DESCRIPTOR
(
input
);
cudnnFilterDescriptor_t
fwd_filter_desc
=
GET_FILTER_DESCRIPTOR
(
filter
);
cudnnTensorDescriptor_t
fwd_dest_desc
=
cudnnConvolutionDescriptor_t
fwd_conv_desc
=
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
GET_TENSOR_DESCRIPTOR
(
output
);
cudnnFilterDescriptor_t
fwd_filter_desc
=
GET_FILTER_DESCRIPTOR
(
filter
);
cudnnConvolutionDescriptor_t
fwd_conv_desc
=
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
...
@@ -250,23 +253,23 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -250,23 +253,23 @@ void hl_conv_workspace(hl_tensor_descriptor input,
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_data_filter_desc
,
bwd_data_filter_desc
,
bwd_data_diff_desc
,
bwd_data_diff_desc
,
bwd_data_conv_desc
,
bwd_data_conv_desc
,
bwd_data_grad_desc
,
bwd_data_grad_desc
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
memoryLimitBytes
,
memoryLimitBytes
,
reinterpret_cast
<
cudnnConvolutionBwdDataAlgo_t
*>
(
convBwdDataAlgo
)));
reinterpret_cast
<
cudnnConvolutionBwdDataAlgo_t
*>
(
convBwdDataAlgo
)));
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_data_filter_desc
,
bwd_data_filter_desc
,
bwd_data_diff_desc
,
bwd_data_diff_desc
,
bwd_data_conv_desc
,
bwd_data_conv_desc
,
bwd_data_grad_desc
,
bwd_data_grad_desc
,
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
*
convBwdDataAlgo
),
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
*
convBwdDataAlgo
),
bwdDataLimitBytes
));
bwdDataLimitBytes
));
// cudnn convolution backward filter configuration
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t
bwd_filter_src_desc
=
cudnnTensorDescriptor_t
bwd_filter_src_desc
=
...
@@ -279,21 +282,21 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -279,21 +282,21 @@ void hl_conv_workspace(hl_tensor_descriptor input,
GET_FILTER_DESCRIPTOR
(
filter
);
GET_FILTER_DESCRIPTOR
(
filter
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
bwd_filter_src_desc
,
bwd_filter_diff_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_conv_desc
,
bwd_filter_grad_desc
,
bwd_filter_grad_desc
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
memoryLimitBytes
,
memoryLimitBytes
,
reinterpret_cast
<
cudnnConvolutionBwdFilterAlgo_t
*>
(
convBwdFilterAlgo
)));
reinterpret_cast
<
cudnnConvolutionBwdFilterAlgo_t
*>
(
convBwdFilterAlgo
)));
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_grad_desc
,
bwd_filter_grad_desc
,
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
*
convBwdFilterAlgo
),
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
*
convBwdFilterAlgo
),
bwdFilterLimitBytes
));
bwdFilterLimitBytes
));
#endif
#endif
}
}
...
@@ -302,8 +305,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
...
@@ -302,8 +305,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
int
batch_size
,
int
batch_size
,
int
feature_maps
,
int
feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
cudnn_tensor_descriptor
hl_desc
=
...
@@ -359,8 +361,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -359,8 +361,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int
batch_size
,
int
batch_size
,
int
feature_maps
,
int
feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
const
int
stride_w
=
1
;
const
int
stride_w
=
1
;
const
int
stride_h
=
width
*
stride_w
;
const
int
stride_h
=
width
*
stride_w
;
const
int
stride_c
=
height
*
stride_h
;
const
int
stride_c
=
height
*
stride_h
;
...
@@ -384,8 +385,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -384,8 +385,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int
nStride
,
int
nStride
,
int
cStride
,
int
cStride
,
int
hStride
,
int
hStride
,
int
wStride
)
int
wStride
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
...
@@ -408,8 +408,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -408,8 +408,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
hl_desc
->
width
=
width
;
hl_desc
->
width
=
width
;
}
}
void
hl_destroy_tensor_descriptor
(
hl_tensor_descriptor
image_desc
)
void
hl_destroy_tensor_descriptor
(
hl_tensor_descriptor
image_desc
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
...
@@ -430,11 +429,9 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
...
@@ -430,11 +429,9 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
int
height_padding
,
int
height_padding
,
int
width_padding
,
int
width_padding
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
cudnnPoolingMode_t
cudnn_mode
;
cudnnPoolingMode_t
cudnn_mode
;
switch
(
mode
)
switch
(
mode
)
{
{
case
HL_POOLING_MAX
:
case
HL_POOLING_MAX
:
cudnn_mode
=
CUDNN_POOLING_MAX
;
cudnn_mode
=
CUDNN_POOLING_MAX
;
break
;
break
;
...
@@ -478,13 +475,13 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
...
@@ -478,13 +475,13 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
*
pooling_desc
=
(
hl_pooling_descriptor
)
hl_pooling_desc
;
*
pooling_desc
=
(
hl_pooling_descriptor
)
hl_pooling_desc
;
}
}
void
hl_destroy_pooling_descriptor
(
hl_pooling_descriptor
pooling_desc
)
void
hl_destroy_pooling_descriptor
(
hl_pooling_descriptor
pooling_desc
)
{
{
CHECK_NOTNULL
(
pooling_desc
);
CHECK_NOTNULL
(
pooling_desc
);
cudnn_pooling_descriptor
hl_pooling
=
(
cudnn_pooling_descriptor
)
pooling_desc
;
cudnn_pooling_descriptor
hl_pooling
=
CHECK_NOTNULL
(
hl_pooling
->
desc
)
;
(
cudnn_pooling_descriptor
)
pooling_desc
;
CHECK_NOTNULL
(
hl_pooling
->
desc
);
CHECK_CUDNN
(
dynload
::
cudnnDestroyPoolingDescriptor
(
hl_pooling
->
desc
));
CHECK_CUDNN
(
dynload
::
cudnnDestroyPoolingDescriptor
(
hl_pooling
->
desc
));
hl_pooling
->
desc
=
NULL
;
hl_pooling
->
desc
=
NULL
;
...
@@ -496,8 +493,7 @@ void hl_pooling_forward(hl_tensor_descriptor input,
...
@@ -496,8 +493,7 @@ void hl_pooling_forward(hl_tensor_descriptor input,
real
*
input_image
,
real
*
input_image
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_image
,
real
*
output_image
,
hl_pooling_descriptor
pooling
)
hl_pooling_descriptor
pooling
)
{
{
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
output_desc
;
cudnnTensorDescriptor_t
output_desc
;
...
@@ -531,8 +527,7 @@ void hl_pooling_backward(hl_tensor_descriptor input,
...
@@ -531,8 +527,7 @@ void hl_pooling_backward(hl_tensor_descriptor input,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_image
,
real
*
output_image
,
real
*
output_image_grad
,
real
*
output_image_grad
,
hl_pooling_descriptor
pooling
)
hl_pooling_descriptor
pooling
)
{
{
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
output_desc
;
cudnnTensorDescriptor_t
output_desc
;
...
@@ -571,8 +566,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
...
@@ -571,8 +566,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
int
input_feature_maps
,
int
input_feature_maps
,
int
output_feature_maps
,
int
output_feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
cudnn_filter_descriptor
hl_filter
=
cudnn_filter_descriptor
hl_filter
=
...
@@ -607,8 +601,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
...
@@ -607,8 +601,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
}
}
void
hl_destroy_filter_descriptor
(
hl_filter_descriptor
filter
)
void
hl_destroy_filter_descriptor
(
hl_filter_descriptor
filter
)
{
{
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
cudnn_filter_descriptor
hl_filter
=
(
cudnn_filter_descriptor
)
filter
;
cudnn_filter_descriptor
hl_filter
=
(
cudnn_filter_descriptor
)
filter
;
...
@@ -627,14 +620,13 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
...
@@ -627,14 +620,13 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int
padding_height
,
int
padding_height
,
int
padding_width
,
int
padding_width
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
cudnn_convolution_descriptor
hl_conv
=
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
(
cudnn_convolution_descriptor
)
malloc
(
sizeof
(
_cudnn_convolution_descriptor
));
malloc
(
sizeof
(
_cudnn_convolution_descriptor
));
CHECK_NOTNULL
(
hl_conv
);
CHECK_NOTNULL
(
hl_conv
);
CHECK_CUDNN
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
hl_conv
->
desc
));
CHECK_CUDNN
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
hl_conv
->
desc
));
cudnnConvolutionMode_t
mode
=
CUDNN_CROSS_CORRELATION
;
cudnnConvolutionMode_t
mode
=
CUDNN_CROSS_CORRELATION
;
...
@@ -667,8 +659,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
...
@@ -667,8 +659,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int
padding_height
,
int
padding_height
,
int
padding_width
,
int
padding_width
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
image
);
CHECK_NOTNULL
(
image
);
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
...
@@ -697,8 +688,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
...
@@ -697,8 +688,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
hl_conv
->
mode
=
mode
;
hl_conv
->
mode
=
mode
;
}
}
void
hl_destroy_convolution_descriptor
(
hl_convolution_descriptor
conv
)
void
hl_destroy_convolution_descriptor
(
hl_convolution_descriptor
conv
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
conv
;
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
conv
;
...
@@ -753,8 +743,7 @@ void hl_convolution_forward(hl_tensor_descriptor input,
...
@@ -753,8 +743,7 @@ void hl_convolution_forward(hl_tensor_descriptor input,
void
hl_convolution_forward_add_bias
(
hl_tensor_descriptor
bias
,
void
hl_convolution_forward_add_bias
(
hl_tensor_descriptor
bias
,
real
*
bias_data
,
real
*
bias_data
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_data
)
real
*
output_data
)
{
{
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
bias_data
);
CHECK_NOTNULL
(
bias_data
);
...
@@ -782,8 +771,7 @@ void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
...
@@ -782,8 +771,7 @@ void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
void
hl_convolution_backward_bias
(
hl_tensor_descriptor
bias
,
void
hl_convolution_backward_bias
(
hl_tensor_descriptor
bias
,
real
*
bias_grad_data
,
real
*
bias_grad_data
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_grad_data
)
real
*
output_grad_data
)
{
{
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
bias_grad_data
);
CHECK_NOTNULL
(
bias_grad_data
);
...
@@ -814,7 +802,6 @@ void hl_convolution_backward_filter(hl_tensor_descriptor input,
...
@@ -814,7 +802,6 @@ void hl_convolution_backward_filter(hl_tensor_descriptor input,
void
*
gpuWorkSpace
,
void
*
gpuWorkSpace
,
size_t
sizeInBytes
,
size_t
sizeInBytes
,
int
convBwdFilterAlgo
)
{
int
convBwdFilterAlgo
)
{
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
...
@@ -889,8 +876,7 @@ void hl_convolution_backward_data(hl_tensor_descriptor input,
...
@@ -889,8 +876,7 @@ void hl_convolution_backward_data(hl_tensor_descriptor input,
void
hl_softmax_forward
(
real
*
input
,
void
hl_softmax_forward
(
real
*
input
,
real
*
output
,
real
*
output
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
#ifndef PADDLE_TYPE_DOUBLE
#ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
#else
#else
...
@@ -923,8 +909,7 @@ void hl_softmax_forward(real *input,
...
@@ -923,8 +909,7 @@ void hl_softmax_forward(real *input,
void
hl_softmax_backward
(
real
*
output_value
,
void
hl_softmax_backward
(
real
*
output_value
,
real
*
output_grad
,
real
*
output_grad
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
#ifndef PADDLE_TYPE_DOUBLE
#ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
#else
#else
...
...
paddle/cuda/src/hl_cuda_device.cc
浏览文件 @
ccea3b02
...
@@ -203,8 +203,8 @@ inline pid_t gettid() {
...
@@ -203,8 +203,8 @@ inline pid_t gettid() {
#endif
#endif
pid_t
tid
=
syscall
(
__NR_gettid
);
pid_t
tid
=
syscall
(
__NR_gettid
);
#endif
#endif
CHECK_NE
(
tid
,
-
1
);
CHECK_NE
(
(
int
)
tid
,
-
1
);
return
tid
;
return
tid
;
}
}
void
hl_init
(
int
device
)
{
void
hl_init
(
int
device
)
{
...
@@ -355,7 +355,8 @@ void* hl_malloc_host(size_t size) {
...
@@ -355,7 +355,8 @@ void* hl_malloc_host(size_t size) {
void
*
dest_h
;
void
*
dest_h
;
CHECK
(
size
)
<<
__func__
<<
": the size for device memory is 0, please check."
;
CHECK
(
size
)
<<
__func__
<<
": the size for device memory is 0, please check."
;
CHECK_CUDA
(
dynload
::
cudaHostAlloc
((
void
**
)
&
dest_h
,
size
,
cudaHostAllocDefault
));
CHECK_CUDA
(
dynload
::
cudaHostAlloc
(
(
void
**
)
&
dest_h
,
size
,
cudaHostAllocDefault
));
return
dest_h
;
return
dest_h
;
}
}
...
@@ -364,7 +365,7 @@ void hl_free_mem_host(void *dest_h) {
...
@@ -364,7 +365,7 @@ void hl_free_mem_host(void *dest_h) {
CHECK_NOTNULL
(
dest_h
);
CHECK_NOTNULL
(
dest_h
);
cudaError_t
err
=
dynload
::
cudaFreeHost
(
dest_h
);
cudaError_t
err
=
dynload
::
cudaFreeHost
(
dest_h
);
CHECK
(
cudaSuccess
==
err
||
cudaErrorCudartUnloading
==
err
)
CHECK
(
cudaSuccess
==
err
||
cudaErrorCudartUnloading
==
err
)
<<
hl_get_device_error_string
();
<<
hl_get_device_error_string
();
}
}
...
@@ -502,7 +503,8 @@ int hl_get_cuda_version() {
...
@@ -502,7 +503,8 @@ int hl_get_cuda_version() {
return
g_cuda_lib_version
;
return
g_cuda_lib_version
;
}
}
void
hl_create_thread_resources
(
int
device
,
thread_device_resources
device_res
)
{
void
hl_create_thread_resources
(
int
device
,
thread_device_resources
device_res
)
{
CHECK_CUDA
(
dynload
::
cudaSetDevice
(
device
));
CHECK_CUDA
(
dynload
::
cudaSetDevice
(
device
));
/* create thread stream */
/* create thread stream */
...
...
paddle/cuda/src/hl_cudart_wrap.cc
浏览文件 @
ccea3b02
...
@@ -78,48 +78,38 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
...
@@ -78,48 +78,38 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
dim3
blockDim
,
dim3
blockDim
,
void
**
args
,
void
**
args
,
size_t
sharedMem
,
size_t
sharedMem
,
cudaStream_t
stream
)
cudaStream_t
stream
)
{
{
return
dynload
::
cudaLaunchKernel
(
func
,
gridDim
,
blockDim
,
return
dynload
::
cudaLaunchKernel
(
func
,
gridDim
,
blockDim
,
args
,
sharedMem
,
stream
);
args
,
sharedMem
,
stream
);
}
}
#endif
/* CUDART_VERSION >= 7000 */
#endif
/* CUDART_VERSION >= 7000 */
__host__
cudaError_t
CUDARTAPI
cudaLaunch
(
const
void
*
func
)
__host__
cudaError_t
CUDARTAPI
cudaLaunch
(
const
void
*
func
)
{
{
return
dynload
::
cudaLaunch
(
func
);
return
dynload
::
cudaLaunch
(
func
);
}
}
__host__
cudaError_t
CUDARTAPI
cudaSetupArgument
(
const
void
*
arg
,
__host__
cudaError_t
CUDARTAPI
cudaSetupArgument
(
const
void
*
arg
,
size_t
size
,
size_t
size
,
size_t
offset
)
size_t
offset
)
{
{
return
dynload
::
cudaSetupArgument
(
arg
,
size
,
offset
);
return
dynload
::
cudaSetupArgument
(
arg
,
size
,
offset
);
}
}
__host__
cudaError_t
CUDARTAPI
cudaConfigureCall
(
dim3
gridDim
,
__host__
cudaError_t
CUDARTAPI
cudaConfigureCall
(
dim3
gridDim
,
dim3
blockDim
,
dim3
blockDim
,
size_t
sharedMem
,
size_t
sharedMem
,
cudaStream_t
stream
)
cudaStream_t
stream
)
{
{
return
dynload
::
cudaConfigureCall
(
gridDim
,
blockDim
,
return
dynload
::
cudaConfigureCall
(
gridDim
,
blockDim
,
sharedMem
,
stream
);
sharedMem
,
stream
);
}
}
extern
"C"
{
extern
"C"
{
void
**
CUDARTAPI
__cudaRegisterFatBinary
(
void
**
CUDARTAPI
__cudaRegisterFatBinary
(
void
*
fatCubin
)
{
void
*
fatCubin
)
{
return
dynload
::
__cudaRegisterFatBinary
(
fatCubin
);
return
dynload
::
__cudaRegisterFatBinary
(
fatCubin
);
}
}
void
CUDARTAPI
__cudaUnregisterFatBinary
(
void
CUDARTAPI
__cudaUnregisterFatBinary
(
void
**
fatCubinHandle
)
{
void
**
fatCubinHandle
)
{
return
dynload
::
__cudaUnregisterFatBinary
(
fatCubinHandle
);
return
dynload
::
__cudaUnregisterFatBinary
(
fatCubinHandle
);
}
}
...
...
paddle/cuda/src/hl_dso_loader.cc
浏览文件 @
ccea3b02
...
@@ -19,17 +19,18 @@ limitations under the License. */
...
@@ -19,17 +19,18 @@ limitations under the License. */
P_DEFINE_string
(
cudnn_dir
,
""
,
P_DEFINE_string
(
cudnn_dir
,
""
,
"Specify path for loading libcudnn.so. For instance, "
"Specify path for loading libcudnn.so. For instance, "
"/usr/local/cudnn/lib64. If empty [default], dlopen
will search
"
"/usr/local/cudnn/lib64. If empty [default], dlopen "
"cudnn from LD_LIBRARY_PATH"
);
"
will search
cudnn from LD_LIBRARY_PATH"
);
P_DEFINE_string
(
cuda_dir
,
""
,
P_DEFINE_string
(
cuda_dir
,
""
,
"Specify path for loading cuda library, such as libcublas, "
"Specify path for loading cuda library, such as libcublas, "
"libcurand. For instance, /usr/local/cuda/lib64. "
"libcurand. For instance, /usr/local/cuda/lib64.
(Note:
"
"
(Note:
libcudart can not be specified by cuda_dir, since some "
"libcudart can not be specified by cuda_dir, since some "
"build-in function in cudart already ran before main entry). "
"build-in function in cudart already ran before main entry). "
"If
empty [default]
, dlopen will search cuda from LD_LIBRARY_PATH"
);
"If
default
, dlopen will search cuda from LD_LIBRARY_PATH"
);
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
// directory separator
const
char
sep
=
'/'
;
const
char
sep
=
'/'
;
...
@@ -49,10 +50,10 @@ static inline std::string join(const std::string& part1, const std::string& part
...
@@ -49,10 +50,10 @@ static inline std::string join(const std::string& part1, const std::string& part
static
inline
void
GetDsoHandleFromDefaultPath
(
static
inline
void
GetDsoHandleFromDefaultPath
(
std
::
string
&
dso_path
,
void
**
dso_handle
,
int
dynload_flags
)
{
std
::
string
&
dso_path
,
void
**
dso_handle
,
int
dynload_flags
)
{
VLOG
(
3
)
<<
"Try to find cuda library: "
<<
dso_path
VLOG
(
3
)
<<
"Try to find cuda library: "
<<
dso_path
<<
" from default system path."
;
<<
" from default system path."
;
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// bring System Integrity Projection (SIP), if dso_handle
// bring System Integrity Projection (SIP), if dso_handle
// is null, search from default package path in Mac OS.
// is null, search from default package path in Mac OS.
...
@@ -62,13 +63,13 @@ static inline void GetDsoHandleFromDefaultPath(
...
@@ -62,13 +63,13 @@ static inline void GetDsoHandleFromDefaultPath(
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
if
(
nullptr
==
*
dso_handle
)
{
if
(
nullptr
==
*
dso_handle
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
LOG
(
FATAL
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
LOG
(
FATAL
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
// NOLINT
<<
"For instance, sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C "
<<
"For instance, sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C "
// NOLINT
<<
"/usr/local
\n
sudo chmod a+r /usr/local/cuda/include/cudnn.h "
<<
"/usr/local
\n
sudo chmod a+r /usr/local/cuda/include/cudnn.h "
// NOLINT
<<
"/usr/local/cuda/lib/libcudnn*"
;
<<
"/usr/local/cuda/lib/libcudnn*"
;
}
}
}
}
}
}
#endif
#endif
}
}
...
@@ -96,19 +97,19 @@ static inline void GetDsoHandleFromSearchPath(
...
@@ -96,19 +97,19 @@ static inline void GetDsoHandleFromSearchPath(
CHECK
(
nullptr
!=
*
dso_handle
)
CHECK
(
nullptr
!=
*
dso_handle
)
<<
"Failed to find cuda library: "
<<
dlPath
<<
std
::
endl
<<
"Failed to find cuda library: "
<<
dlPath
<<
std
::
endl
<<
"Please specify its path correctly using one of the following
ideas:
\n
"
<<
"Please specify its path correctly using one of the following
ways:
\n
"
// NOLINT
<<
"
Idea
1. set cuda and cudnn lib path at runtime. "
<<
"
Method
1. set cuda and cudnn lib path at runtime. "
<<
"http://www.paddlepaddle.org/doc/ui/cmd_argument/argument_outline.html
\n
"
<<
"http://www.paddlepaddle.org/doc/ui/cmd_argument/argument_outline.html
\n
"
// NOLINT
<<
"For instance, issue command: paddle train --use_gpu=1 "
<<
"For instance, issue command: paddle train --use_gpu=1 "
<<
"--cuda_dir=/usr/local/cud
nn/lib --cudnn_dir=/usr/local/cudnn/lib ...
\n
"
<<
"--cuda_dir=/usr/local/cud
a/lib64 --cudnn_dir=/usr/local/cudnn/lib ...
\n
"
// NOLINT
<<
"
Idea
2. set environment variable LD_LIBRARY_PATH on Linux or "
<<
"
Method
2. set environment variable LD_LIBRARY_PATH on Linux or "
<<
"DYLD_LIBRARY_PATH on Mac OS.
\n
"
<<
"DYLD_LIBRARY_PATH on Mac OS.
\n
"
<<
"For instance, issue command: export LD_LIBRARY_PATH=...
\n
"
<<
"For instance, issue command: export LD_LIBRARY_PATH=...
\n
"
<<
"Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible "
<<
"Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible "
<<
"unless System Integrity Protection (SIP) is disabled. However,
@Idea 1"
<<
"unless System Integrity Protection (SIP) is disabled. However,
method 1 "
// NOLINT
<<
"always work well."
;
<<
"always work well."
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录