Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
20aac5bb
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
20aac5bb
编写于
11月 20, 2016
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add style check for *.cc files in cuda directory
上级
77ddce0f
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
112 addition
and
132 deletion
+112
-132
paddle/cuda/CMakeLists.txt
paddle/cuda/CMakeLists.txt
+5
-2
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+7
-8
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+64
-79
paddle/cuda/src/hl_cuda_device.cc
paddle/cuda/src/hl_cuda_device.cc
+7
-5
paddle/cuda/src/hl_cudart_wrap.cc
paddle/cuda/src/hl_cudart_wrap.cc
+8
-18
paddle/cuda/src/hl_dso_loader.cc
paddle/cuda/src/hl_dso_loader.cc
+21
-20
未找到文件。
paddle/cuda/CMakeLists.txt
浏览文件 @
20aac5bb
...
@@ -81,5 +81,8 @@ else()
...
@@ -81,5 +81,8 @@ else()
add_library
(
paddle_cuda
${
CUDA_SOURCES
}
)
add_library
(
paddle_cuda
${
CUDA_SOURCES
}
)
endif
()
endif
()
add_style_check_target
(
paddle_cuda
${
CUDA_SOURCES
}
)
add_style_check_target
(
paddle_cuda
add_style_check_target
(
paddle_cuda
${
CUDA_HEADERS
}
)
${
CUDA_SOURCES
}
${
CUDA_HEADERS
}
${
CUDA_DSO_SOURCES
}
${
CUDA_CXX_WITH_GPU_SOURCES
}
)
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
20aac5bb
...
@@ -104,7 +104,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
...
@@ -104,7 +104,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
#endif
#endif
const
char
*
hl_cublas_get_error_string
(
cublasStatus_t
status
)
{
const
char
*
hl_cublas_get_error_string
(
cublasStatus_t
status
)
{
switch
(
status
)
{
switch
(
status
)
{
case
CUBLAS_STATUS_NOT_INITIALIZED
:
case
CUBLAS_STATUS_NOT_INITIALIZED
:
return
"[cublas status]: not initialized"
;
return
"[cublas status]: not initialized"
;
case
CUBLAS_STATUS_ALLOC_FAILED
:
case
CUBLAS_STATUS_ALLOC_FAILED
:
...
@@ -181,7 +181,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -181,7 +181,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
real
**
inout_d
=
(
real
**
)
hl_malloc_device
(
sizeof
(
real
*
));
real
**
inout_d
=
(
real
**
)
hl_malloc_device
(
sizeof
(
real
*
));
hl_memcpy
(
inout_d
,
inout_h
,
sizeof
(
real
*
));
hl_memcpy
(
inout_d
,
inout_h
,
sizeof
(
real
*
));
int
*
pivot_d
=
(
int
*
)
hl_malloc_device
(
dimN
*
sizeof
(
int
));
int
*
pivot_d
=
(
int
*
)
hl_malloc_device
(
dimN
*
sizeof
(
int
));
int
*
info_d
=
(
int
*
)
t_resource
.
gpu_mem
;
int
*
info_d
=
(
int
*
)
t_resource
.
gpu_mem
;
/* Note: cublasSgetrfBatched is used to calculate a number of
/* Note: cublasSgetrfBatched is used to calculate a number of
...
@@ -189,10 +189,9 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -189,10 +189,9 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
the API for better performance.
the API for better performance.
*/
*/
CHECK_CUBLAS
(
CUBLAS_GETRF
(
t_resource
.
handle
,
CHECK_CUBLAS
(
CUBLAS_GETRF
(
t_resource
.
handle
,
dimN
,
inout_d
,
lda
,
pivot_d
,
dimN
,
inout_d
,
lda
,
pivot_d
,
info_d
,
1
));
info_d
,
1
));
int
info_h
;
int
info_h
;
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
if
(
info_h
!=
0
)
{
if
(
info_h
!=
0
)
{
LOG
(
FATAL
)
<<
"Factorization of matrix failed: matrix may be singular.
\n
"
;
LOG
(
FATAL
)
<<
"Factorization of matrix failed: matrix may be singular.
\n
"
;
...
@@ -204,8 +203,8 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -204,8 +203,8 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_memcpy
(
out_d
,
out_h
,
sizeof
(
real
*
));
hl_memcpy
(
out_d
,
out_h
,
sizeof
(
real
*
));
CHECK_CUBLAS
(
CUBLAS_GETRI
(
t_resource
.
handle
,
CHECK_CUBLAS
(
CUBLAS_GETRI
(
t_resource
.
handle
,
dimN
,
(
const
real
**
)
inout_d
,
lda
,
pivot_d
,
dimN
,
(
const
real
**
)
inout_d
,
lda
,
pivot_d
,
out_d
,
ldc
,
info_d
,
1
));
out_d
,
ldc
,
info_d
,
1
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
hl_memcpy
(
&
info_h
,
info_d
,
sizeof
(
int
));
if
(
info_h
!=
0
)
{
if
(
info_h
!=
0
)
{
...
@@ -215,7 +214,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
...
@@ -215,7 +214,7 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_free_mem_device
(
inout_d
);
hl_free_mem_device
(
inout_d
);
hl_free_mem_device
(
pivot_d
);
hl_free_mem_device
(
pivot_d
);
hl_free_mem_device
(
out_d
);
hl_free_mem_device
(
out_d
);
CHECK_SYNC
(
"hl_matrix_inverse failed"
);
CHECK_SYNC
(
"hl_matrix_inverse failed"
);
}
}
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
20aac5bb
...
@@ -159,13 +159,11 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
...
@@ -159,13 +159,11 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
bool
g_is_libcudnn_init
=
false
;
bool
g_is_libcudnn_init
=
false
;
int
g_cudnn_lib_version
=
0
;
int
g_cudnn_lib_version
=
0
;
void
hl_cudnn_desc_init
(
cudnnTensorDescriptor_t
*
cudnn_desc
)
void
hl_cudnn_desc_init
(
cudnnTensorDescriptor_t
*
cudnn_desc
)
{
{
CHECK_CUDNN
(
dynload
::
cudnnCreateTensorDescriptor
(
cudnn_desc
));
CHECK_CUDNN
(
dynload
::
cudnnCreateTensorDescriptor
(
cudnn_desc
));
}
}
void
hl_cudnn_init
(
cudnnHandle_t
*
cudnn_handle
,
cudaStream_t
stream
)
void
hl_cudnn_init
(
cudnnHandle_t
*
cudnn_handle
,
cudaStream_t
stream
)
{
{
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
size_t
cudnn_dso_major
=
cudnn_dso_ver
/
1000
;
size_t
cudnn_dso_major
=
cudnn_dso_ver
/
1000
;
size_t
cudnn_cuh_major
=
CUDNN_VERSION
/
1000
;
size_t
cudnn_cuh_major
=
CUDNN_VERSION
/
1000
;
...
@@ -212,13 +210,18 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -212,13 +210,18 @@ void hl_conv_workspace(hl_tensor_descriptor input,
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
// Specify workspace limit directly
// Specify workspace limit directly
size_t
memoryLimitBytes
=
(
1LL
<<
20
)
*
FLAGS_cudnn_conv_workspace_limit_in_mb
;
size_t
memoryLimitBytes
=
(
1LL
<<
20
)
*
FLAGS_cudnn_conv_workspace_limit_in_mb
;
// cudnn convolution forward configuration
// cudnn convolution forward configuration
cudnnTensorDescriptor_t
fwd_src_desc
=
GET_TENSOR_DESCRIPTOR
(
input
);
cudnnTensorDescriptor_t
fwd_src_desc
=
cudnnTensorDescriptor_t
fwd_dest_desc
=
GET_TENSOR_DESCRIPTOR
(
output
);
GET_TENSOR_DESCRIPTOR
(
input
);
cudnnFilterDescriptor_t
fwd_filter_desc
=
GET_FILTER_DESCRIPTOR
(
filter
);
cudnnTensorDescriptor_t
fwd_dest_desc
=
cudnnConvolutionDescriptor_t
fwd_conv_desc
=
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
GET_TENSOR_DESCRIPTOR
(
output
);
cudnnFilterDescriptor_t
fwd_filter_desc
=
GET_FILTER_DESCRIPTOR
(
filter
);
cudnnConvolutionDescriptor_t
fwd_conv_desc
=
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
...
@@ -250,23 +253,23 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -250,23 +253,23 @@ void hl_conv_workspace(hl_tensor_descriptor input,
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
GET_CONVOLUTION_DESCRIPTOR
(
conv
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_data_filter_desc
,
bwd_data_filter_desc
,
bwd_data_diff_desc
,
bwd_data_diff_desc
,
bwd_data_conv_desc
,
bwd_data_conv_desc
,
bwd_data_grad_desc
,
bwd_data_grad_desc
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
memoryLimitBytes
,
memoryLimitBytes
,
reinterpret_cast
<
cudnnConvolutionBwdDataAlgo_t
*>
(
convBwdDataAlgo
)));
reinterpret_cast
<
cudnnConvolutionBwdDataAlgo_t
*>
(
convBwdDataAlgo
)));
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_data_filter_desc
,
bwd_data_filter_desc
,
bwd_data_diff_desc
,
bwd_data_diff_desc
,
bwd_data_conv_desc
,
bwd_data_conv_desc
,
bwd_data_grad_desc
,
bwd_data_grad_desc
,
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
*
convBwdDataAlgo
),
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
*
convBwdDataAlgo
),
bwdDataLimitBytes
));
bwdDataLimitBytes
));
// cudnn convolution backward filter configuration
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t
bwd_filter_src_desc
=
cudnnTensorDescriptor_t
bwd_filter_src_desc
=
...
@@ -279,21 +282,21 @@ void hl_conv_workspace(hl_tensor_descriptor input,
...
@@ -279,21 +282,21 @@ void hl_conv_workspace(hl_tensor_descriptor input,
GET_FILTER_DESCRIPTOR
(
filter
);
GET_FILTER_DESCRIPTOR
(
filter
);
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
t_resource
.
cudnn_handle
,
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
bwd_filter_src_desc
,
bwd_filter_diff_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_conv_desc
,
bwd_filter_grad_desc
,
bwd_filter_grad_desc
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
memoryLimitBytes
,
memoryLimitBytes
,
reinterpret_cast
<
cudnnConvolutionBwdFilterAlgo_t
*>
(
convBwdFilterAlgo
)));
reinterpret_cast
<
cudnnConvolutionBwdFilterAlgo_t
*>
(
convBwdFilterAlgo
)));
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
CHECK_CUDNN
(
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
t_resource
.
cudnn_handle
,
bwd_filter_src_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_diff_desc
,
bwd_filter_conv_desc
,
bwd_filter_grad_desc
,
bwd_filter_grad_desc
,
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
*
convBwdFilterAlgo
),
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
*
convBwdFilterAlgo
),
bwdFilterLimitBytes
));
bwdFilterLimitBytes
));
#endif
#endif
}
}
...
@@ -302,8 +305,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
...
@@ -302,8 +305,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
int
batch_size
,
int
batch_size
,
int
feature_maps
,
int
feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
cudnn_tensor_descriptor
hl_desc
=
...
@@ -359,8 +361,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -359,8 +361,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int
batch_size
,
int
batch_size
,
int
feature_maps
,
int
feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
const
int
stride_w
=
1
;
const
int
stride_w
=
1
;
const
int
stride_h
=
width
*
stride_w
;
const
int
stride_h
=
width
*
stride_w
;
const
int
stride_c
=
height
*
stride_h
;
const
int
stride_c
=
height
*
stride_h
;
...
@@ -384,8 +385,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -384,8 +385,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int
nStride
,
int
nStride
,
int
cStride
,
int
cStride
,
int
hStride
,
int
hStride
,
int
wStride
)
int
wStride
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
...
@@ -408,8 +408,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
...
@@ -408,8 +408,7 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
hl_desc
->
width
=
width
;
hl_desc
->
width
=
width
;
}
}
void
hl_destroy_tensor_descriptor
(
hl_tensor_descriptor
image_desc
)
void
hl_destroy_tensor_descriptor
(
hl_tensor_descriptor
image_desc
)
{
{
CHECK_NOTNULL
(
image_desc
);
CHECK_NOTNULL
(
image_desc
);
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
cudnn_tensor_descriptor
hl_desc
=
(
cudnn_tensor_descriptor
)
image_desc
;
...
@@ -430,11 +429,9 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
...
@@ -430,11 +429,9 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
int
height_padding
,
int
height_padding
,
int
width_padding
,
int
width_padding
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
cudnnPoolingMode_t
cudnn_mode
;
cudnnPoolingMode_t
cudnn_mode
;
switch
(
mode
)
switch
(
mode
)
{
{
case
HL_POOLING_MAX
:
case
HL_POOLING_MAX
:
cudnn_mode
=
CUDNN_POOLING_MAX
;
cudnn_mode
=
CUDNN_POOLING_MAX
;
break
;
break
;
...
@@ -478,13 +475,13 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
...
@@ -478,13 +475,13 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
*
pooling_desc
=
(
hl_pooling_descriptor
)
hl_pooling_desc
;
*
pooling_desc
=
(
hl_pooling_descriptor
)
hl_pooling_desc
;
}
}
void
hl_destroy_pooling_descriptor
(
hl_pooling_descriptor
pooling_desc
)
void
hl_destroy_pooling_descriptor
(
hl_pooling_descriptor
pooling_desc
)
{
{
CHECK_NOTNULL
(
pooling_desc
);
CHECK_NOTNULL
(
pooling_desc
);
cudnn_pooling_descriptor
hl_pooling
=
(
cudnn_pooling_descriptor
)
pooling_desc
;
cudnn_pooling_descriptor
hl_pooling
=
CHECK_NOTNULL
(
hl_pooling
->
desc
)
;
(
cudnn_pooling_descriptor
)
pooling_desc
;
CHECK_NOTNULL
(
hl_pooling
->
desc
);
CHECK_CUDNN
(
dynload
::
cudnnDestroyPoolingDescriptor
(
hl_pooling
->
desc
));
CHECK_CUDNN
(
dynload
::
cudnnDestroyPoolingDescriptor
(
hl_pooling
->
desc
));
hl_pooling
->
desc
=
NULL
;
hl_pooling
->
desc
=
NULL
;
...
@@ -496,8 +493,7 @@ void hl_pooling_forward(hl_tensor_descriptor input,
...
@@ -496,8 +493,7 @@ void hl_pooling_forward(hl_tensor_descriptor input,
real
*
input_image
,
real
*
input_image
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_image
,
real
*
output_image
,
hl_pooling_descriptor
pooling
)
hl_pooling_descriptor
pooling
)
{
{
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
output_desc
;
cudnnTensorDescriptor_t
output_desc
;
...
@@ -531,8 +527,7 @@ void hl_pooling_backward(hl_tensor_descriptor input,
...
@@ -531,8 +527,7 @@ void hl_pooling_backward(hl_tensor_descriptor input,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_image
,
real
*
output_image
,
real
*
output_image_grad
,
real
*
output_image_grad
,
hl_pooling_descriptor
pooling
)
hl_pooling_descriptor
pooling
)
{
{
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnPoolingDescriptor_t
pooling_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
input_desc
;
cudnnTensorDescriptor_t
output_desc
;
cudnnTensorDescriptor_t
output_desc
;
...
@@ -571,8 +566,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
...
@@ -571,8 +566,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
int
input_feature_maps
,
int
input_feature_maps
,
int
output_feature_maps
,
int
output_feature_maps
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
cudnn_filter_descriptor
hl_filter
=
cudnn_filter_descriptor
hl_filter
=
...
@@ -607,8 +601,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
...
@@ -607,8 +601,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
}
}
void
hl_destroy_filter_descriptor
(
hl_filter_descriptor
filter
)
void
hl_destroy_filter_descriptor
(
hl_filter_descriptor
filter
)
{
{
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
cudnn_filter_descriptor
hl_filter
=
(
cudnn_filter_descriptor
)
filter
;
cudnn_filter_descriptor
hl_filter
=
(
cudnn_filter_descriptor
)
filter
;
...
@@ -627,14 +620,13 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
...
@@ -627,14 +620,13 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int
padding_height
,
int
padding_height
,
int
padding_width
,
int
padding_width
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
cudnn_convolution_descriptor
hl_conv
=
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
(
cudnn_convolution_descriptor
)
malloc
(
sizeof
(
_cudnn_convolution_descriptor
));
malloc
(
sizeof
(
_cudnn_convolution_descriptor
));
CHECK_NOTNULL
(
hl_conv
);
CHECK_NOTNULL
(
hl_conv
);
CHECK_CUDNN
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
hl_conv
->
desc
));
CHECK_CUDNN
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
hl_conv
->
desc
));
cudnnConvolutionMode_t
mode
=
CUDNN_CROSS_CORRELATION
;
cudnnConvolutionMode_t
mode
=
CUDNN_CROSS_CORRELATION
;
...
@@ -667,8 +659,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
...
@@ -667,8 +659,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int
padding_height
,
int
padding_height
,
int
padding_width
,
int
padding_width
,
int
stride_height
,
int
stride_height
,
int
stride_width
)
int
stride_width
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
image
);
CHECK_NOTNULL
(
image
);
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
...
@@ -697,8 +688,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
...
@@ -697,8 +688,7 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
hl_conv
->
mode
=
mode
;
hl_conv
->
mode
=
mode
;
}
}
void
hl_destroy_convolution_descriptor
(
hl_convolution_descriptor
conv
)
void
hl_destroy_convolution_descriptor
(
hl_convolution_descriptor
conv
)
{
{
CHECK_NOTNULL
(
conv
);
CHECK_NOTNULL
(
conv
);
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
conv
;
cudnn_convolution_descriptor
hl_conv
=
(
cudnn_convolution_descriptor
)
conv
;
...
@@ -753,8 +743,7 @@ void hl_convolution_forward(hl_tensor_descriptor input,
...
@@ -753,8 +743,7 @@ void hl_convolution_forward(hl_tensor_descriptor input,
void
hl_convolution_forward_add_bias
(
hl_tensor_descriptor
bias
,
void
hl_convolution_forward_add_bias
(
hl_tensor_descriptor
bias
,
real
*
bias_data
,
real
*
bias_data
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_data
)
real
*
output_data
)
{
{
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
bias_data
);
CHECK_NOTNULL
(
bias_data
);
...
@@ -782,8 +771,7 @@ void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
...
@@ -782,8 +771,7 @@ void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
void
hl_convolution_backward_bias
(
hl_tensor_descriptor
bias
,
void
hl_convolution_backward_bias
(
hl_tensor_descriptor
bias
,
real
*
bias_grad_data
,
real
*
bias_grad_data
,
hl_tensor_descriptor
output
,
hl_tensor_descriptor
output
,
real
*
output_grad_data
)
real
*
output_grad_data
)
{
{
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
bias
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
bias_grad_data
);
CHECK_NOTNULL
(
bias_grad_data
);
...
@@ -814,7 +802,6 @@ void hl_convolution_backward_filter(hl_tensor_descriptor input,
...
@@ -814,7 +802,6 @@ void hl_convolution_backward_filter(hl_tensor_descriptor input,
void
*
gpuWorkSpace
,
void
*
gpuWorkSpace
,
size_t
sizeInBytes
,
size_t
sizeInBytes
,
int
convBwdFilterAlgo
)
{
int
convBwdFilterAlgo
)
{
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
output
);
CHECK_NOTNULL
(
filter
);
CHECK_NOTNULL
(
filter
);
...
@@ -889,8 +876,7 @@ void hl_convolution_backward_data(hl_tensor_descriptor input,
...
@@ -889,8 +876,7 @@ void hl_convolution_backward_data(hl_tensor_descriptor input,
void
hl_softmax_forward
(
real
*
input
,
void
hl_softmax_forward
(
real
*
input
,
real
*
output
,
real
*
output
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
#ifndef PADDLE_TYPE_DOUBLE
#ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
#else
#else
...
@@ -923,8 +909,7 @@ void hl_softmax_forward(real *input,
...
@@ -923,8 +909,7 @@ void hl_softmax_forward(real *input,
void
hl_softmax_backward
(
real
*
output_value
,
void
hl_softmax_backward
(
real
*
output_value
,
real
*
output_grad
,
real
*
output_grad
,
int
height
,
int
height
,
int
width
)
int
width
)
{
{
#ifndef PADDLE_TYPE_DOUBLE
#ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
cudnnDataType_t
data_type
=
CUDNN_DATA_FLOAT
;
#else
#else
...
...
paddle/cuda/src/hl_cuda_device.cc
浏览文件 @
20aac5bb
...
@@ -203,8 +203,8 @@ inline pid_t gettid() {
...
@@ -203,8 +203,8 @@ inline pid_t gettid() {
#endif
#endif
pid_t
tid
=
syscall
(
__NR_gettid
);
pid_t
tid
=
syscall
(
__NR_gettid
);
#endif
#endif
CHECK_NE
(
tid
,
-
1
);
CHECK_NE
(
(
int
)
tid
,
-
1
);
return
tid
;
return
tid
;
}
}
void
hl_init
(
int
device
)
{
void
hl_init
(
int
device
)
{
...
@@ -355,7 +355,8 @@ void* hl_malloc_host(size_t size) {
...
@@ -355,7 +355,8 @@ void* hl_malloc_host(size_t size) {
void
*
dest_h
;
void
*
dest_h
;
CHECK
(
size
)
<<
__func__
<<
": the size for device memory is 0, please check."
;
CHECK
(
size
)
<<
__func__
<<
": the size for device memory is 0, please check."
;
CHECK_CUDA
(
dynload
::
cudaHostAlloc
((
void
**
)
&
dest_h
,
size
,
cudaHostAllocDefault
));
CHECK_CUDA
(
dynload
::
cudaHostAlloc
(
(
void
**
)
&
dest_h
,
size
,
cudaHostAllocDefault
));
return
dest_h
;
return
dest_h
;
}
}
...
@@ -364,7 +365,7 @@ void hl_free_mem_host(void *dest_h) {
...
@@ -364,7 +365,7 @@ void hl_free_mem_host(void *dest_h) {
CHECK_NOTNULL
(
dest_h
);
CHECK_NOTNULL
(
dest_h
);
cudaError_t
err
=
dynload
::
cudaFreeHost
(
dest_h
);
cudaError_t
err
=
dynload
::
cudaFreeHost
(
dest_h
);
CHECK
(
cudaSuccess
==
err
||
cudaErrorCudartUnloading
==
err
)
CHECK
(
cudaSuccess
==
err
||
cudaErrorCudartUnloading
==
err
)
<<
hl_get_device_error_string
();
<<
hl_get_device_error_string
();
}
}
...
@@ -502,7 +503,8 @@ int hl_get_cuda_version() {
...
@@ -502,7 +503,8 @@ int hl_get_cuda_version() {
return
g_cuda_lib_version
;
return
g_cuda_lib_version
;
}
}
void
hl_create_thread_resources
(
int
device
,
thread_device_resources
device_res
)
{
void
hl_create_thread_resources
(
int
device
,
thread_device_resources
device_res
)
{
CHECK_CUDA
(
dynload
::
cudaSetDevice
(
device
));
CHECK_CUDA
(
dynload
::
cudaSetDevice
(
device
));
/* create thread stream */
/* create thread stream */
...
...
paddle/cuda/src/hl_cudart_wrap.cc
浏览文件 @
20aac5bb
...
@@ -78,48 +78,38 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
...
@@ -78,48 +78,38 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
dim3
blockDim
,
dim3
blockDim
,
void
**
args
,
void
**
args
,
size_t
sharedMem
,
size_t
sharedMem
,
cudaStream_t
stream
)
cudaStream_t
stream
)
{
{
return
dynload
::
cudaLaunchKernel
(
func
,
gridDim
,
blockDim
,
return
dynload
::
cudaLaunchKernel
(
func
,
gridDim
,
blockDim
,
args
,
sharedMem
,
stream
);
args
,
sharedMem
,
stream
);
}
}
#endif
/* CUDART_VERSION >= 7000 */
#endif
/* CUDART_VERSION >= 7000 */
__host__
cudaError_t
CUDARTAPI
cudaLaunch
(
const
void
*
func
)
__host__
cudaError_t
CUDARTAPI
cudaLaunch
(
const
void
*
func
)
{
{
return
dynload
::
cudaLaunch
(
func
);
return
dynload
::
cudaLaunch
(
func
);
}
}
__host__
cudaError_t
CUDARTAPI
cudaSetupArgument
(
const
void
*
arg
,
__host__
cudaError_t
CUDARTAPI
cudaSetupArgument
(
const
void
*
arg
,
size_t
size
,
size_t
size
,
size_t
offset
)
size_t
offset
)
{
{
return
dynload
::
cudaSetupArgument
(
arg
,
size
,
offset
);
return
dynload
::
cudaSetupArgument
(
arg
,
size
,
offset
);
}
}
__host__
cudaError_t
CUDARTAPI
cudaConfigureCall
(
dim3
gridDim
,
__host__
cudaError_t
CUDARTAPI
cudaConfigureCall
(
dim3
gridDim
,
dim3
blockDim
,
dim3
blockDim
,
size_t
sharedMem
,
size_t
sharedMem
,
cudaStream_t
stream
)
cudaStream_t
stream
)
{
{
return
dynload
::
cudaConfigureCall
(
gridDim
,
blockDim
,
return
dynload
::
cudaConfigureCall
(
gridDim
,
blockDim
,
sharedMem
,
stream
);
sharedMem
,
stream
);
}
}
extern
"C"
{
extern
"C"
{
void
**
CUDARTAPI
__cudaRegisterFatBinary
(
void
**
CUDARTAPI
__cudaRegisterFatBinary
(
void
*
fatCubin
)
{
void
*
fatCubin
)
{
return
dynload
::
__cudaRegisterFatBinary
(
fatCubin
);
return
dynload
::
__cudaRegisterFatBinary
(
fatCubin
);
}
}
void
CUDARTAPI
__cudaUnregisterFatBinary
(
void
CUDARTAPI
__cudaUnregisterFatBinary
(
void
**
fatCubinHandle
)
{
void
**
fatCubinHandle
)
{
return
dynload
::
__cudaUnregisterFatBinary
(
fatCubinHandle
);
return
dynload
::
__cudaUnregisterFatBinary
(
fatCubinHandle
);
}
}
...
...
paddle/cuda/src/hl_dso_loader.cc
浏览文件 @
20aac5bb
...
@@ -19,17 +19,18 @@ limitations under the License. */
...
@@ -19,17 +19,18 @@ limitations under the License. */
P_DEFINE_string
(
cudnn_dir
,
""
,
P_DEFINE_string
(
cudnn_dir
,
""
,
"Specify path for loading libcudnn.so. For instance, "
"Specify path for loading libcudnn.so. For instance, "
"/usr/local/cudnn/lib64. If empty [default], dlopen
will search
"
"/usr/local/cudnn/lib64. If empty [default], dlopen "
"cudnn from LD_LIBRARY_PATH"
);
"
will search
cudnn from LD_LIBRARY_PATH"
);
P_DEFINE_string
(
cuda_dir
,
""
,
P_DEFINE_string
(
cuda_dir
,
""
,
"Specify path for loading cuda library, such as libcublas, "
"Specify path for loading cuda library, such as libcublas, "
"libcurand. For instance, /usr/local/cuda/lib64. "
"libcurand. For instance, /usr/local/cuda/lib64.
(Note:
"
"
(Note:
libcudart can not be specified by cuda_dir, since some "
"libcudart can not be specified by cuda_dir, since some "
"build-in function in cudart already ran before main entry). "
"build-in function in cudart already ran before main entry). "
"If
empty [default]
, dlopen will search cuda from LD_LIBRARY_PATH"
);
"If
default
, dlopen will search cuda from LD_LIBRARY_PATH"
);
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
// directory separator
const
char
sep
=
'/'
;
const
char
sep
=
'/'
;
...
@@ -49,10 +50,10 @@ static inline std::string join(const std::string& part1, const std::string& part
...
@@ -49,10 +50,10 @@ static inline std::string join(const std::string& part1, const std::string& part
static
inline
void
GetDsoHandleFromDefaultPath
(
static
inline
void
GetDsoHandleFromDefaultPath
(
std
::
string
&
dso_path
,
void
**
dso_handle
,
int
dynload_flags
)
{
std
::
string
&
dso_path
,
void
**
dso_handle
,
int
dynload_flags
)
{
VLOG
(
3
)
<<
"Try to find cuda library: "
<<
dso_path
VLOG
(
3
)
<<
"Try to find cuda library: "
<<
dso_path
<<
" from default system path."
;
<<
" from default system path."
;
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// bring System Integrity Projection (SIP), if dso_handle
// bring System Integrity Projection (SIP), if dso_handle
// is null, search from default package path in Mac OS.
// is null, search from default package path in Mac OS.
...
@@ -62,13 +63,13 @@ static inline void GetDsoHandleFromDefaultPath(
...
@@ -62,13 +63,13 @@ static inline void GetDsoHandleFromDefaultPath(
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
if
(
nullptr
==
*
dso_handle
)
{
if
(
nullptr
==
*
dso_handle
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
LOG
(
FATAL
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
LOG
(
FATAL
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
// NOLINT
<<
"For instance, sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C "
<<
"For instance, sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C "
// NOLINT
<<
"/usr/local
\n
sudo chmod a+r /usr/local/cuda/include/cudnn.h "
<<
"/usr/local
\n
sudo chmod a+r /usr/local/cuda/include/cudnn.h "
// NOLINT
<<
"/usr/local/cuda/lib/libcudnn*"
;
<<
"/usr/local/cuda/lib/libcudnn*"
;
}
}
}
}
}
}
#endif
#endif
}
}
...
@@ -96,19 +97,19 @@ static inline void GetDsoHandleFromSearchPath(
...
@@ -96,19 +97,19 @@ static inline void GetDsoHandleFromSearchPath(
CHECK
(
nullptr
!=
*
dso_handle
)
CHECK
(
nullptr
!=
*
dso_handle
)
<<
"Failed to find cuda library: "
<<
dlPath
<<
std
::
endl
<<
"Failed to find cuda library: "
<<
dlPath
<<
std
::
endl
<<
"Please specify its path correctly using one of the following
ideas:
\n
"
<<
"Please specify its path correctly using one of the following
ways:
\n
"
// NOLINT
<<
"
Idea
1. set cuda and cudnn lib path at runtime. "
<<
"
Method
1. set cuda and cudnn lib path at runtime. "
<<
"http://www.paddlepaddle.org/doc/ui/cmd_argument/argument_outline.html
\n
"
<<
"http://www.paddlepaddle.org/doc/ui/cmd_argument/argument_outline.html
\n
"
// NOLINT
<<
"For instance, issue command: paddle train --use_gpu=1 "
<<
"For instance, issue command: paddle train --use_gpu=1 "
<<
"--cuda_dir=/usr/local/cud
nn/lib --cudnn_dir=/usr/local/cudnn/lib ...
\n
"
<<
"--cuda_dir=/usr/local/cud
a/lib64 --cudnn_dir=/usr/local/cudnn/lib ...
\n
"
// NOLINT
<<
"
Idea
2. set environment variable LD_LIBRARY_PATH on Linux or "
<<
"
Method
2. set environment variable LD_LIBRARY_PATH on Linux or "
<<
"DYLD_LIBRARY_PATH on Mac OS.
\n
"
<<
"DYLD_LIBRARY_PATH on Mac OS.
\n
"
<<
"For instance, issue command: export LD_LIBRARY_PATH=...
\n
"
<<
"For instance, issue command: export LD_LIBRARY_PATH=...
\n
"
<<
"Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible "
<<
"Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible "
<<
"unless System Integrity Protection (SIP) is disabled. However,
@Idea 1"
<<
"unless System Integrity Protection (SIP) is disabled. However,
method 1 "
// NOLINT
<<
"always work well."
;
<<
"always work well."
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录