Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
75d15719
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
75d15719
编写于
9月 03, 2019
作者:
T
Tao Luo
提交者:
GitHub
9月 03, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine PADDLE_ENFORCE codes for unify PADDLE_ASSERT_MSG (#19603)
test=develop
上级
1c2aae56
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
103 addition
and
91 deletion
+103
-91
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+2
-2
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+3
-3
paddle/fluid/framework/tensor_util.h
paddle/fluid/framework/tensor_util.h
+1
-1
paddle/fluid/operators/detail/safe_ref.h
paddle/fluid/operators/detail/safe_ref.h
+1
-1
paddle/fluid/operators/dgc_op.h
paddle/fluid/operators/dgc_op.h
+4
-3
paddle/fluid/operators/dropout_op.cu
paddle/fluid/operators/dropout_op.cu
+4
-3
paddle/fluid/operators/math/im2col.cu
paddle/fluid/operators/math/im2col.cu
+8
-8
paddle/fluid/operators/math/sample_prob.cu
paddle/fluid/operators/math/sample_prob.cu
+3
-3
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+5
-5
paddle/fluid/operators/math/vol2col.cu
paddle/fluid/operators/math/vol2col.cu
+4
-4
paddle/fluid/operators/optimizers/lars_momentum_op.h
paddle/fluid/operators/optimizers/lars_momentum_op.h
+1
-1
paddle/fluid/operators/sample_logits_op.h
paddle/fluid/operators/sample_logits_op.h
+12
-10
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+3
-3
paddle/fluid/operators/sync_batch_norm_op.cu
paddle/fluid/operators/sync_batch_norm_op.cu
+2
-2
paddle/fluid/platform/cuda_helper.h
paddle/fluid/platform/cuda_helper.h
+6
-4
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+26
-20
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+2
-2
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+12
-12
paddle/fluid/platform/profiler.cu
paddle/fluid/platform/profiler.cu
+4
-4
未找到文件。
paddle/fluid/framework/ir/node.h
浏览文件 @
75d15719
...
@@ -66,12 +66,12 @@ class Node {
...
@@ -66,12 +66,12 @@ class Node {
std
::
string
Name
()
const
{
return
name_
;
}
std
::
string
Name
()
const
{
return
name_
;
}
VarDesc
*
Var
()
const
{
VarDesc
*
Var
()
const
{
PADDLE_ENFORCE
(
IsVar
()
);
PADDLE_ENFORCE
_EQ
(
IsVar
(),
true
);
return
var_desc_
.
get
();
return
var_desc_
.
get
();
}
}
OpDesc
*
Op
()
const
{
OpDesc
*
Op
()
const
{
PADDLE_ENFORCE
(
IsOp
()
);
PADDLE_ENFORCE
_EQ
(
IsOp
(),
true
);
return
op_desc_
.
get
();
return
op_desc_
.
get
();
}
}
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
75d15719
...
@@ -53,7 +53,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -53,7 +53,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
);
auto
dst_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
ctx_place
),
true
);
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
PADDLE_ENFORCE_EQ
(
src_gpu_place
,
ctx_gpu_place
);
PADDLE_ENFORCE_EQ
(
src_gpu_place
,
ctx_gpu_place
);
auto
stream
=
auto
stream
=
...
@@ -64,7 +64,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -64,7 +64,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto
src_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
);
auto
src_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
ctx_place
),
true
);
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
PADDLE_ENFORCE_EQ
(
dst_gpu_place
,
ctx_gpu_place
);
PADDLE_ENFORCE_EQ
(
dst_gpu_place
,
ctx_gpu_place
);
auto
stream
=
auto
stream
=
...
@@ -75,7 +75,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -75,7 +75,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
ctx_place
),
true
);
auto
stream
=
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
...
...
paddle/fluid/framework/tensor_util.h
浏览文件 @
75d15719
...
@@ -146,7 +146,7 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
...
@@ -146,7 +146,7 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst
->
resize
(
src
.
numel
());
dst
->
resize
(
src
.
numel
());
auto
dst_ptr
=
static_cast
<
void
*>
(
dst
->
data
());
auto
dst_ptr
=
static_cast
<
void
*>
(
dst
->
data
());
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
src
.
place
())
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_cpu_place
(
src
.
place
()),
true
);
memory
::
Copy
(
dst_place
,
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src
.
place
()),
memory
::
Copy
(
dst_place
,
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src
.
place
()),
src_ptr
,
size
);
src_ptr
,
size
);
...
...
paddle/fluid/operators/detail/safe_ref.h
浏览文件 @
75d15719
...
@@ -25,7 +25,7 @@ namespace detail {
...
@@ -25,7 +25,7 @@ namespace detail {
*/
*/
template
<
typename
T
,
typename
...
ARGS
>
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
null
ptr
,
::
paddle
::
string
::
Sprintf
(
args
...));
PADDLE_ENFORCE
_NOT_NULL
(
ptr
,
::
paddle
::
string
::
Sprintf
(
args
...));
return
*
ptr
;
return
*
ptr
;
}
}
...
...
paddle/fluid/operators/dgc_op.h
浏览文件 @
75d15719
...
@@ -23,14 +23,14 @@ namespace operators {
...
@@ -23,14 +23,14 @@ namespace operators {
inline
float
get_period_sparcity
(
const
std
::
vector
<
float
>&
sparsity
,
inline
float
get_period_sparcity
(
const
std
::
vector
<
float
>&
sparsity
,
float
cur_step
,
float
rampup_steps
)
{
float
cur_step
,
float
rampup_steps
)
{
PADDLE_ENFORCE
(
static_cast
<
int
>
(
cur_step
)
>=
0
);
PADDLE_ENFORCE
_GE
(
static_cast
<
int
>
(
cur_step
),
0
);
size_t
idx
=
static_cast
<
int
>
(
cur_step
*
sparsity
.
size
()
/
rampup_steps
);
size_t
idx
=
static_cast
<
int
>
(
cur_step
*
sparsity
.
size
()
/
rampup_steps
);
if
(
idx
>=
sparsity
.
size
())
{
if
(
idx
>=
sparsity
.
size
())
{
return
0.999
;
return
0.999
;
}
}
PADDLE_ENFORCE
(
idx
<
sparsity
.
size
());
PADDLE_ENFORCE
_LT
(
idx
,
sparsity
.
size
());
return
sparsity
[
idx
];
return
sparsity
[
idx
];
}
}
...
@@ -63,7 +63,8 @@ class DGCOpKernel : public framework::OpKernel<T> {
...
@@ -63,7 +63,8 @@ class DGCOpKernel : public framework::OpKernel<T> {
float
ratio
=
float
ratio
=
1
-
get_period_sparcity
(
sparsity
,
static_cast
<
float
>
(
*
current_step
),
1
-
get_period_sparcity
(
sparsity
,
static_cast
<
float
>
(
*
current_step
),
rampup_step
);
rampup_step
);
PADDLE_ENFORCE
(
ratio
>
0.0
&&
ratio
<
1.0
);
PADDLE_ENFORCE_GE
(
ratio
,
0.0
);
PADDLE_ENFORCE_LT
(
ratio
,
1.0
);
int
k
=
static_cast
<
int
>
(
g
->
numel
()
*
ratio
);
int
k
=
static_cast
<
int
>
(
g
->
numel
()
*
ratio
);
VLOG
(
10
)
<<
"m:"
<<
m
<<
", use_nesterov:"
<<
use_nesterov
VLOG
(
10
)
<<
"m:"
<<
m
<<
", use_nesterov:"
<<
use_nesterov
...
...
paddle/fluid/operators/dropout_op.cu
浏览文件 @
75d15719
...
@@ -86,9 +86,10 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
...
@@ -86,9 +86,10 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
dropout_prob
==
1.0
f
)
{
if
(
dropout_prob
==
1.0
f
)
{
PADDLE_ENFORCE
(
cudaMemsetAsync
(
y_data
,
0
,
x_numel
*
sizeof
(
T
),
stream
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
PADDLE_ENFORCE
(
cudaMemsetAsync
(
mask_data
,
0
,
cudaMemsetAsync
(
y_data
,
0
,
x_numel
*
sizeof
(
T
),
stream
));
x_numel
*
sizeof
(
*
mask_data
),
stream
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMemsetAsync
(
mask_data
,
0
,
x_numel
*
sizeof
(
*
mask_data
),
stream
));
return
;
return
;
}
}
...
...
paddle/fluid/operators/math/im2col.cu
浏览文件 @
75d15719
...
@@ -66,8 +66,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -66,8 +66,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
_EQ
(
im
.
dims
().
size
(),
3
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
PADDLE_ENFORCE
_EQ
(
col
->
dims
().
size
(),
5
);
int
im_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
...
@@ -152,8 +152,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -152,8 +152,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
_EQ
(
im
->
dims
().
size
(),
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
_EQ
(
col
.
dims
().
size
(),
5
);
int
im_channels
=
im
->
dims
()[
0
];
int
im_channels
=
im
->
dims
()[
0
];
int
im_height
=
im
->
dims
()[
1
];
int
im_height
=
im
->
dims
()[
1
];
...
@@ -249,8 +249,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -249,8 +249,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
_EQ
(
im
.
dims
().
size
(),
3
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
PADDLE_ENFORCE
_EQ
(
col
->
dims
().
size
(),
5
);
int
im_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
im_width
=
im
.
dims
()[
2
];
...
@@ -331,8 +331,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -331,8 +331,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
_EQ
(
im
->
dims
().
size
(),
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
_EQ
(
col
.
dims
().
size
(),
5
);
int
im_channels
=
im
->
dims
()[
0
];
int
im_channels
=
im
->
dims
()[
0
];
int
im_height
=
im
->
dims
()[
1
];
int
im_height
=
im
->
dims
()[
1
];
int
im_width
=
im
->
dims
()[
2
];
int
im_width
=
im
->
dims
()[
2
];
...
...
paddle/fluid/operators/math/sample_prob.cu
浏览文件 @
75d15719
...
@@ -142,9 +142,9 @@ void GPUSampleWithProb<T>::operator()(
...
@@ -142,9 +142,9 @@ void GPUSampleWithProb<T>::operator()(
int
num_tries
=
UniqSampler
<
T
>
(
sampler
,
num_samples
,
s_data
);
int
num_tries
=
UniqSampler
<
T
>
(
sampler
,
num_samples
,
s_data
);
VLOG
(
1
)
<<
"num_tries: "
<<
num_tries
;
VLOG
(
1
)
<<
"num_tries: "
<<
num_tries
;
PADDLE_ENFORCE
(
cudaMemcpy
(
samples_data
+
num_true
,
s_data
,
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaMemcpy
(
samples_data
+
num_true
,
s_data
,
sizeof
(
int64_t
)
*
num_samples
,
sizeof
(
int64_t
)
*
num_samples
,
cudaMemcpyHostToDevice
));
cudaMemcpyHostToDevice
));
int
threads
=
512
;
int
threads
=
512
;
const
size_t
size
=
batch_size
*
num_sampled_classes
;
const
size_t
size
=
batch_size
*
num_sampled_classes
;
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
75d15719
...
@@ -55,11 +55,11 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
...
@@ -55,11 +55,11 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
in1_place
=
input1
.
place
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in1_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
in1_place
),
true
);
auto
in2_place
=
input2
.
place
();
auto
in2_place
=
input2
.
place
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in2_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
in2_place
),
true
);
auto
out_place
=
context
.
GetPlace
();
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
out_place
),
true
);
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
...
@@ -162,9 +162,9 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
...
@@ -162,9 +162,9 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
}
}
auto
in1_place
=
input1
.
place
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in1_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
in1_place
),
true
);
auto
in2_place
=
input2
->
place
();
auto
in2_place
=
input2
->
place
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in2_place
)
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_gpu_place
(
in2_place
),
true
);
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
...
...
paddle/fluid/operators/math/vol2col.cu
浏览文件 @
75d15719
...
@@ -78,8 +78,8 @@ class Vol2ColFunctor<platform::CUDADeviceContext, T> {
...
@@ -78,8 +78,8 @@ class Vol2ColFunctor<platform::CUDADeviceContext, T> {
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
{
framework
::
Tensor
*
col
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
_EQ
(
vol
.
dims
().
size
(),
4
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
7
);
PADDLE_ENFORCE
_EQ
(
col
->
dims
().
size
(),
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
.
dims
()[
1
];
...
@@ -204,8 +204,8 @@ class Col2VolFunctor<platform::CUDADeviceContext, T> {
...
@@ -204,8 +204,8 @@ class Col2VolFunctor<platform::CUDADeviceContext, T> {
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
)
const
{
framework
::
Tensor
*
vol
)
const
{
PADDLE_ENFORCE
(
vol
->
dims
().
size
()
==
4
);
PADDLE_ENFORCE
_EQ
(
vol
->
dims
().
size
(),
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
_EQ
(
col
.
dims
().
size
(),
7
);
int
input_channels
=
vol
->
dims
()[
0
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_depth
=
vol
->
dims
()[
1
];
...
...
paddle/fluid/operators/optimizers/lars_momentum_op.h
浏览文件 @
75d15719
...
@@ -30,7 +30,7 @@ class LarsMomentumOpKernel : public framework::OpKernel<T> {
...
@@ -30,7 +30,7 @@ class LarsMomentumOpKernel : public framework::OpKernel<T> {
auto
learning_rate
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"LearningRate"
);
auto
learning_rate
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"LearningRate"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
// only support dense for now.
// only support dense for now.
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
()
);
PADDLE_ENFORCE
_EQ
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
);
auto
grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Grad"
);
auto
grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Grad"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/sample_logits_op.h
浏览文件 @
75d15719
...
@@ -49,11 +49,12 @@ static void CPUTakeAlongD1(const platform::DeviceContext& ctx,
...
@@ -49,11 +49,12 @@ static void CPUTakeAlongD1(const platform::DeviceContext& ctx,
const
framework
::
Tensor
&
array
,
const
framework
::
Tensor
&
array
,
const
framework
::
Tensor
&
index
,
const
framework
::
Tensor
&
index
,
framework
::
Tensor
*
value
)
{
framework
::
Tensor
*
value
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
())
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
);
// UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K)
// UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K)
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
2
&&
array
.
dims
().
size
()
==
2
&&
PADDLE_ENFORCE_EQ
(
index
.
dims
().
size
(),
2
);
index
.
dims
()[
0
]
==
array
.
dims
()[
0
]
&&
PADDLE_ENFORCE_EQ
(
array
.
dims
().
size
(),
2
);
index
.
dims
()
==
value
->
dims
());
PADDLE_ENFORCE_EQ
(
index
.
dims
()[
0
],
array
.
dims
()[
0
]);
PADDLE_ENFORCE_EQ
(
index
.
dims
(),
value
->
dims
());
const
auto
batch_size
=
index
.
dims
()[
0
];
const
auto
batch_size
=
index
.
dims
()[
0
];
const
auto
num_take
=
index
.
dims
()[
1
];
const
auto
num_take
=
index
.
dims
()[
1
];
...
@@ -88,11 +89,12 @@ static void CPUPutAlongD1(const platform::DeviceContext& ctx,
...
@@ -88,11 +89,12 @@ static void CPUPutAlongD1(const platform::DeviceContext& ctx,
framework
::
Tensor
*
array
,
framework
::
Tensor
*
array
,
const
framework
::
Tensor
&
index
,
const
framework
::
Tensor
&
index
,
const
framework
::
Tensor
&
value
)
{
const
framework
::
Tensor
&
value
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
())
);
PADDLE_ENFORCE
_EQ
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
);
// UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K)
// UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K)
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
2
&&
array
->
dims
().
size
()
==
2
&&
PADDLE_ENFORCE_EQ
(
index
.
dims
().
size
(),
2
);
index
.
dims
()[
0
]
==
array
->
dims
()[
0
]
&&
PADDLE_ENFORCE_EQ
(
array
->
dims
().
size
(),
2
);
index
.
dims
()
==
value
.
dims
());
PADDLE_ENFORCE_EQ
(
index
.
dims
()[
0
],
array
->
dims
()[
0
]);
PADDLE_ENFORCE_EQ
(
index
.
dims
(),
value
.
dims
());
const
auto
batch_size
=
index
.
dims
()[
0
];
const
auto
batch_size
=
index
.
dims
()[
0
];
const
auto
num_put
=
index
.
dims
()[
1
];
const
auto
num_put
=
index
.
dims
()[
1
];
auto
array_dims
=
array
->
dims
();
auto
array_dims
=
array
->
dims
();
...
@@ -147,8 +149,8 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
...
@@ -147,8 +149,8 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
public:
public:
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
context
.
GetPlace
())
,
PADDLE_ENFORCE
_EQ
(
platform
::
is_cpu_place
(
context
.
GetPlace
()),
true
,
"This kernel only runs on CPU."
);
"This kernel only runs on CPU."
);
VLOG
(
3
)
<<
"Enter SampleLogitsKernel"
;
VLOG
(
3
)
<<
"Enter SampleLogitsKernel"
;
// get necessary inputs
// get necessary inputs
const
Tensor
*
logits
=
context
.
Input
<
Tensor
>
(
"Logits"
);
const
Tensor
*
logits
=
context
.
Input
<
Tensor
>
(
"Logits"
);
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
75d15719
...
@@ -92,8 +92,8 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
...
@@ -92,8 +92,8 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
bool
in_place
=
out_var
==
in_vars
[
0
];
bool
in_place
=
out_var
==
in_vars
[
0
];
auto
&
out_array
=
*
out_var
->
GetMutable
<
framework
::
LoDTensorArray
>
();
auto
&
out_array
=
*
out_var
->
GetMutable
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_vars
.
size
();
++
i
)
{
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_vars
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensorArray
>
()
,
PADDLE_ENFORCE
_EQ
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensorArray
>
(),
true
,
"Only support all inputs are TensorArray"
);
"Only support all inputs are TensorArray"
);
auto
&
in_array
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensorArray
>
();
auto
&
in_array
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
0
;
i
<
in_array
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
in_array
.
size
();
++
i
)
{
...
@@ -106,7 +106,7 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
...
@@ -106,7 +106,7 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
context
.
device_context
(),
&
out_array
[
i
]);
context
.
device_context
(),
&
out_array
[
i
]);
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
}
else
{
}
else
{
PADDLE_ENFORCE
(
out_array
[
i
].
lod
()
==
in_array
[
i
].
lod
());
PADDLE_ENFORCE
_EQ
(
out_array
[
i
].
lod
(),
in_array
[
i
].
lod
());
auto
in
=
EigenVector
<
T
>::
Flatten
(
in_array
[
i
]);
auto
in
=
EigenVector
<
T
>::
Flatten
(
in_array
[
i
]);
auto
result
=
EigenVector
<
T
>::
Flatten
(
out_array
[
i
]);
auto
result
=
EigenVector
<
T
>::
Flatten
(
out_array
[
i
]);
result
.
device
(
*
context
.
template
device_context
<
DeviceContext
>()
result
.
device
(
*
context
.
template
device_context
<
DeviceContext
>()
...
...
paddle/fluid/operators/sync_batch_norm_op.cu
浏览文件 @
75d15719
...
@@ -178,7 +178,7 @@ class SyncBatchNormKernel : public framework::OpKernel<T> {
...
@@ -178,7 +178,7 @@ class SyncBatchNormKernel : public framework::OpKernel<T> {
int
dtype
=
platform
::
ToNCCLDataType
(
x
->
type
());
int
dtype
=
platform
::
ToNCCLDataType
(
x
->
type
());
// In-place operation
// In-place operation
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclAllReduce
(
stats
,
stats
,
2
*
C
+
1
,
static_cast
<
ncclDataType_t
>
(
dtype
),
ncclSum
,
stats
,
stats
,
2
*
C
+
1
,
static_cast
<
ncclDataType_t
>
(
dtype
),
ncclSum
,
comm
,
stream
));
comm
,
stream
));
...
@@ -398,7 +398,7 @@ class SyncBatchNormGradKernel : public framework::OpKernel<T> {
...
@@ -398,7 +398,7 @@ class SyncBatchNormGradKernel : public framework::OpKernel<T> {
}
}
int
dtype
=
platform
::
ToNCCLDataType
(
x
->
type
());
int
dtype
=
platform
::
ToNCCLDataType
(
x
->
type
());
// In-place operation
// In-place operation
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclAllReduce
(
stats
,
stats
,
2
*
C
+
1
,
static_cast
<
ncclDataType_t
>
(
dtype
),
ncclSum
,
stats
,
stats
,
2
*
C
+
1
,
static_cast
<
ncclDataType_t
>
(
dtype
),
ncclSum
,
comm
,
stream
));
comm
,
stream
));
...
...
paddle/fluid/platform/cuda_helper.h
浏览文件 @
75d15719
...
@@ -29,17 +29,19 @@ namespace platform {
...
@@ -29,17 +29,19 @@ namespace platform {
class
CublasHandleHolder
{
class
CublasHandleHolder
{
public:
public:
CublasHandleHolder
(
cudaStream_t
stream
,
cublasMath_t
math_type
)
{
CublasHandleHolder
(
cudaStream_t
stream
,
cublasMath_t
math_type
)
{
PADDLE_ENFORCE
(
dynload
::
cublasCreate
(
&
handle_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cublasCreate
(
&
handle_
));
PADDLE_ENFORCE
(
dynload
::
cublasSetStream
(
handle_
,
stream
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cublasSetStream
(
handle_
,
stream
));
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
if
(
math_type
==
CUBLAS_TENSOR_OP_MATH
)
{
if
(
math_type
==
CUBLAS_TENSOR_OP_MATH
)
{
PADDLE_ENFORCE
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cublasSetMathMode
(
handle_
,
CUBLAS_TENSOR_OP_MATH
));
dynload
::
cublasSetMathMode
(
handle_
,
CUBLAS_TENSOR_OP_MATH
));
}
}
#endif
#endif
}
}
~
CublasHandleHolder
()
{
PADDLE_ENFORCE
(
dynload
::
cublasDestroy
(
handle_
));
}
~
CublasHandleHolder
()
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cublasDestroy
(
handle_
));
}
template
<
typename
Callback
>
template
<
typename
Callback
>
inline
void
Call
(
Callback
&&
callback
)
const
{
inline
void
Call
(
Callback
&&
callback
)
const
{
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
75d15719
...
@@ -221,10 +221,10 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
...
@@ -221,10 +221,10 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
class
ScopedTensorDescriptor
{
class
ScopedTensorDescriptor
{
public:
public:
ScopedTensorDescriptor
()
{
ScopedTensorDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateTensorDescriptor
(
&
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnCreateTensorDescriptor
(
&
desc_
));
}
}
~
ScopedTensorDescriptor
()
{
~
ScopedTensorDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyTensorDescriptor
(
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnDestroyTensorDescriptor
(
desc_
));
}
}
inline
cudnnTensorDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
inline
cudnnTensorDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
...
@@ -243,7 +243,7 @@ class ScopedTensorDescriptor {
...
@@ -243,7 +243,7 @@ class ScopedTensorDescriptor {
if
(
groups
>
1
)
{
if
(
groups
>
1
)
{
dims_with_group
[
1
]
=
dims_with_group
[
1
]
/
groups
;
dims_with_group
[
1
]
=
dims_with_group
[
1
]
/
groups
;
}
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetTensorNdDescriptor
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetTensorNdDescriptor
(
desc_
,
type
,
dims_with_group
.
size
(),
dims_with_group
.
data
(),
desc_
,
type
,
dims_with_group
.
size
(),
dims_with_group
.
data
(),
strides
.
data
()));
strides
.
data
()));
return
desc_
;
return
desc_
;
...
@@ -265,10 +265,10 @@ class ScopedTensorDescriptor {
...
@@ -265,10 +265,10 @@ class ScopedTensorDescriptor {
class
ScopedFilterDescriptor
{
class
ScopedFilterDescriptor
{
public:
public:
ScopedFilterDescriptor
()
{
ScopedFilterDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateFilterDescriptor
(
&
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnCreateFilterDescriptor
(
&
desc_
));
}
}
~
ScopedFilterDescriptor
()
{
~
ScopedFilterDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyFilterDescriptor
(
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnDestroyFilterDescriptor
(
desc_
));
}
}
inline
cudnnFilterDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
inline
cudnnFilterDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
...
@@ -284,7 +284,7 @@ class ScopedFilterDescriptor {
...
@@ -284,7 +284,7 @@ class ScopedFilterDescriptor {
kernel_with_group
[
0
]
/=
groups
;
kernel_with_group
[
0
]
/=
groups
;
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetFilterNdDescriptor
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetFilterNdDescriptor
(
desc_
,
type
,
format
,
kernel_with_group
.
size
(),
desc_
,
type
,
format
,
kernel_with_group
.
size
(),
kernel_with_group
.
data
()));
kernel_with_group
.
data
()));
return
desc_
;
return
desc_
;
...
@@ -306,10 +306,12 @@ class ScopedFilterDescriptor {
...
@@ -306,10 +306,12 @@ class ScopedFilterDescriptor {
class
ScopedConvolutionDescriptor
{
class
ScopedConvolutionDescriptor
{
public:
public:
ScopedConvolutionDescriptor
()
{
ScopedConvolutionDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnCreateConvolutionDescriptor
(
&
desc_
));
}
}
~
ScopedConvolutionDescriptor
()
{
~
ScopedConvolutionDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyConvolutionDescriptor
(
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnDestroyConvolutionDescriptor
(
desc_
));
}
}
inline
cudnnConvolutionDescriptor_t
descriptor
(
inline
cudnnConvolutionDescriptor_t
descriptor
(
...
@@ -332,7 +334,7 @@ class ScopedConvolutionDescriptor {
...
@@ -332,7 +334,7 @@ class ScopedConvolutionDescriptor {
cudnnDataType_t
compute_type
=
cudnnDataType_t
compute_type
=
(
type
==
CUDNN_DATA_DOUBLE
)
?
CUDNN_DATA_DOUBLE
:
CUDNN_DATA_FLOAT
;
(
type
==
CUDNN_DATA_DOUBLE
)
?
CUDNN_DATA_DOUBLE
:
CUDNN_DATA_FLOAT
;
PADDLE_ENFORCE
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
CUDNN_CROSS_CORRELATION
,
compute_type
));
CUDNN_CROSS_CORRELATION
,
compute_type
));
return
desc_
;
return
desc_
;
...
@@ -353,10 +355,10 @@ class ScopedConvolutionDescriptor {
...
@@ -353,10 +355,10 @@ class ScopedConvolutionDescriptor {
class
ScopedPoolingDescriptor
{
class
ScopedPoolingDescriptor
{
public:
public:
ScopedPoolingDescriptor
()
{
ScopedPoolingDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreatePoolingDescriptor
(
&
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnCreatePoolingDescriptor
(
&
desc_
));
}
}
~
ScopedPoolingDescriptor
()
{
~
ScopedPoolingDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyPoolingDescriptor
(
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnDestroyPoolingDescriptor
(
desc_
));
}
}
inline
cudnnPoolingDescriptor_t
descriptor
(
const
PoolingMode
&
mode
,
inline
cudnnPoolingDescriptor_t
descriptor
(
const
PoolingMode
&
mode
,
...
@@ -365,7 +367,7 @@ class ScopedPoolingDescriptor {
...
@@ -365,7 +367,7 @@ class ScopedPoolingDescriptor {
const
std
::
vector
<
int
>&
strides
)
{
const
std
::
vector
<
int
>&
strides
)
{
PADDLE_ENFORCE_EQ
(
kernel
.
size
(),
pads
.
size
());
PADDLE_ENFORCE_EQ
(
kernel
.
size
(),
pads
.
size
());
PADDLE_ENFORCE_EQ
(
kernel
.
size
(),
strides
.
size
());
PADDLE_ENFORCE_EQ
(
kernel
.
size
(),
strides
.
size
());
PADDLE_ENFORCE
(
dynload
::
cudnnSetPoolingNdDescriptor
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetPoolingNdDescriptor
(
desc_
,
(
GetPoolingMode
(
mode
)),
desc_
,
(
GetPoolingMode
(
mode
)),
CUDNN_PROPAGATE_NAN
,
// Always propagate nans.
CUDNN_PROPAGATE_NAN
,
// Always propagate nans.
kernel
.
size
(),
kernel
.
data
(),
pads
.
data
(),
strides
.
data
()));
kernel
.
size
(),
kernel
.
data
(),
pads
.
data
(),
strides
.
data
()));
...
@@ -380,16 +382,18 @@ class ScopedPoolingDescriptor {
...
@@ -380,16 +382,18 @@ class ScopedPoolingDescriptor {
class
ScopedSpatialTransformerDescriptor
{
class
ScopedSpatialTransformerDescriptor
{
public:
public:
ScopedSpatialTransformerDescriptor
()
{
ScopedSpatialTransformerDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateSpatialTransformerDescriptor
(
&
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnCreateSpatialTransformerDescriptor
(
&
desc_
));
}
}
~
ScopedSpatialTransformerDescriptor
()
{
~
ScopedSpatialTransformerDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroySpatialTransformerDescriptor
(
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnDestroySpatialTransformerDescriptor
(
desc_
));
}
}
template
<
typename
T
>
template
<
typename
T
>
inline
cudnnSpatialTransformerDescriptor_t
descriptor
(
const
int
nbDims
,
inline
cudnnSpatialTransformerDescriptor_t
descriptor
(
const
int
nbDims
,
const
int
dimA
[])
{
const
int
dimA
[])
{
PADDLE_ENFORCE
(
dynload
::
cudnnSetSpatialTransformerNdDescriptor
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetSpatialTransformerNdDescriptor
(
desc_
,
CUDNN_SAMPLER_BILINEAR
,
CudnnDataType
<
T
>::
type
,
nbDims
,
dimA
));
desc_
,
CUDNN_SAMPLER_BILINEAR
,
CudnnDataType
<
T
>::
type
,
nbDims
,
dimA
));
return
desc_
;
return
desc_
;
}
}
...
@@ -402,10 +406,12 @@ class ScopedSpatialTransformerDescriptor {
...
@@ -402,10 +406,12 @@ class ScopedSpatialTransformerDescriptor {
class
ScopedActivationDescriptor
{
class
ScopedActivationDescriptor
{
public:
public:
ScopedActivationDescriptor
()
{
ScopedActivationDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateActivationDescriptor
(
&
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnCreateActivationDescriptor
(
&
desc_
));
}
}
~
ScopedActivationDescriptor
()
{
~
ScopedActivationDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyActivationDescriptor
(
desc_
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
dynload
::
cudnnDestroyActivationDescriptor
(
desc_
));
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -467,15 +473,15 @@ inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
...
@@ -467,15 +473,15 @@ inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
class
ScopedCTCLossDescriptor
{
class
ScopedCTCLossDescriptor
{
public:
public:
ScopedCTCLossDescriptor
()
{
ScopedCTCLossDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnCreateCTCLossDescriptor
(
&
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnCreateCTCLossDescriptor
(
&
desc_
));
}
}
~
ScopedCTCLossDescriptor
()
{
~
ScopedCTCLossDescriptor
()
{
PADDLE_ENFORCE
(
dynload
::
cudnnDestroyCTCLossDescriptor
(
desc_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnDestroyCTCLossDescriptor
(
desc_
));
}
}
template
<
typename
T
>
template
<
typename
T
>
inline
cudnnCTCLossDescriptor_t
descriptor
()
{
inline
cudnnCTCLossDescriptor_t
descriptor
()
{
PADDLE_ENFORCE
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
cudnnSetCTCLossDescriptor
(
desc_
,
CudnnDataType
<
T
>::
type
));
dynload
::
cudnnSetCTCLossDescriptor
(
desc_
,
CudnnDataType
<
T
>::
type
));
return
desc_
;
return
desc_
;
}
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
75d15719
...
@@ -167,7 +167,7 @@ class CudnnHolder {
...
@@ -167,7 +167,7 @@ class CudnnHolder {
inline
void
ResetWorkspace
()
{
inline
void
ResetWorkspace
()
{
if
(
workspace_
)
{
if
(
workspace_
)
{
// Maybe someone is using the current workspace
// Maybe someone is using the current workspace
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
*
stream_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamSynchronize
(
*
stream_
));
workspace_
=
nullptr
;
workspace_
=
nullptr
;
}
}
}
}
...
@@ -306,7 +306,7 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -306,7 +306,7 @@ class CUDADeviceContext : public DeviceContext {
template
<
typename
Callback
>
template
<
typename
Callback
>
void
RecordEvent
(
cudaEvent_t
ev
,
Callback
callback
)
{
void
RecordEvent
(
cudaEvent_t
ev
,
Callback
callback
)
{
callback
();
callback
();
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventRecord
(
ev
,
stream_
));
}
}
template
<
typename
Callback
>
template
<
typename
Callback
>
...
...
paddle/fluid/platform/nccl_helper.h
浏览文件 @
75d15719
...
@@ -63,11 +63,11 @@ class NCCLGroupGuard {
...
@@ -63,11 +63,11 @@ class NCCLGroupGuard {
inline
NCCLGroupGuard
()
{
inline
NCCLGroupGuard
()
{
NCCLMutex
().
lock
();
NCCLMutex
().
lock
();
PADDLE_ENFORCE
(
dynload
::
ncclGroupStart
());
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
ncclGroupStart
());
}
}
inline
~
NCCLGroupGuard
()
{
inline
~
NCCLGroupGuard
()
{
PADDLE_ENFORCE
(
dynload
::
ncclGroupEnd
());
PADDLE_ENFORCE
_CUDA_SUCCESS
(
dynload
::
ncclGroupEnd
());
NCCLMutex
().
unlock
();
NCCLMutex
().
unlock
();
}
}
};
};
...
@@ -94,7 +94,7 @@ struct NCCLContextMap {
...
@@ -94,7 +94,7 @@ struct NCCLContextMap {
explicit
NCCLContextMap
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
explicit
NCCLContextMap
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
ncclUniqueId
*
nccl_id
=
nullptr
,
ncclUniqueId
*
nccl_id
=
nullptr
,
size_t
num_trainers
=
1
,
size_t
trainer_id
=
0
)
{
size_t
num_trainers
=
1
,
size_t
trainer_id
=
0
)
{
PADDLE_ENFORCE
(
!
places
.
empty
()
);
PADDLE_ENFORCE
_EQ
(
!
places
.
empty
(),
true
);
order_
.
reserve
(
places
.
size
());
order_
.
reserve
(
places
.
size
());
for
(
auto
&
p
:
places
)
{
for
(
auto
&
p
:
places
)
{
int
dev_id
=
boost
::
get
<
CUDAPlace
>
(
p
).
device
;
int
dev_id
=
boost
::
get
<
CUDAPlace
>
(
p
).
device
;
...
@@ -109,7 +109,7 @@ struct NCCLContextMap {
...
@@ -109,7 +109,7 @@ struct NCCLContextMap {
// if num_trainers == 1, should create a new nccl id for local comms.
// if num_trainers == 1, should create a new nccl id for local comms.
if
(
num_trainers
==
1
&&
nccl_id
==
nullptr
)
{
if
(
num_trainers
==
1
&&
nccl_id
==
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
NCCLGroupGuard
::
NCCLMutex
());
std
::
lock_guard
<
std
::
mutex
>
guard
(
NCCLGroupGuard
::
NCCLMutex
());
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclCommInitAll
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclCommInitAll
(
comms
.
get
(),
static_cast
<
int
>
(
order_
.
size
()),
order_
.
data
()));
comms
.
get
(),
static_cast
<
int
>
(
order_
.
size
()),
order_
.
data
()));
}
else
{
}
else
{
PADDLE_ENFORCE_NOT_NULL
(
nccl_id
);
PADDLE_ENFORCE_NOT_NULL
(
nccl_id
);
...
@@ -126,8 +126,8 @@ struct NCCLContextMap {
...
@@ -126,8 +126,8 @@ struct NCCLContextMap {
}
}
VLOG
(
1
)
<<
"init nccl rank:"
<<
rank
<<
", nranks:"
<<
nranks
VLOG
(
1
)
<<
"init nccl rank:"
<<
rank
<<
", nranks:"
<<
nranks
<<
", gpu_id:"
<<
gpu_id
<<
", dev_id:"
<<
order_
[
i
];
<<
", gpu_id:"
<<
gpu_id
<<
", dev_id:"
<<
order_
[
i
];
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_id
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaSetDevice
(
gpu_id
));
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclCommInitRank
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclCommInitRank
(
comms
.
get
()
+
i
,
nranks
,
*
nccl_id
,
rank
));
comms
.
get
()
+
i
,
nranks
,
*
nccl_id
,
rank
));
}
}
}
}
...
@@ -249,13 +249,13 @@ class NCCLCommunicator {
...
@@ -249,13 +249,13 @@ class NCCLCommunicator {
size_t
trainers_num
,
size_t
trainer_id
,
size_t
trainers_num
,
size_t
trainer_id
,
size_t
inter_trainers_num
,
size_t
inter_trainers_num
,
size_t
exter_trainers_num
)
{
size_t
exter_trainers_num
)
{
PADDLE_ENFORCE
(
trainers_num
==
inter_trainers_num
*
exter_trainers_num
,
PADDLE_ENFORCE
_EQ
(
trainers_num
,
inter_trainers_num
*
exter_trainers_num
,
"trainers_num:%llu != inter_trainers_num:%llu * "
"trainers_num:%llu != inter_trainers_num:%llu * "
"exter_trainers_num:%llu"
,
"exter_trainers_num:%llu"
,
trainers_num
,
inter_trainers_num
,
exter_trainers_num
);
trainers_num
,
inter_trainers_num
,
exter_trainers_num
);
PADDLE_ENFORCE
(
inter_trainers_num
>
1
,
"inter_trainers_num:%llu must > 1"
,
PADDLE_ENFORCE
_GT
(
inter_trainers_num
,
1
,
"inter_trainers_num:%llu must > 1"
,
inter_trainers_num
);
inter_trainers_num
);
int
inter_trainer_id
=
trainer_id
%
inter_trainers_num
;
int
inter_trainer_id
=
trainer_id
%
inter_trainers_num
;
for
(
size_t
i
=
0
;
i
<
inter_nccl_ids
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
inter_nccl_ids
.
size
();
i
++
)
{
...
...
paddle/fluid/platform/profiler.cu
浏览文件 @
75d15719
...
@@ -35,13 +35,13 @@ void DummyKernelAndEvent() {
...
@@ -35,13 +35,13 @@ void DummyKernelAndEvent() {
ForEachDevice
([](
int
d
)
{
ForEachDevice
([](
int
d
)
{
platform
::
SetDeviceId
(
d
);
platform
::
SetDeviceId
(
d
);
cudaStream_t
stream
;
cudaStream_t
stream
;
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamCreate
(
&
stream
));
Mark
(
"_cuda_startup_"
);
Mark
(
"_cuda_startup_"
);
int
*
ptr
;
int
*
ptr
;
PADDLE_ENFORCE
(
cudaMalloc
(
&
ptr
,
sizeof
(
int
)));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaMalloc
(
&
ptr
,
sizeof
(
int
)));
DummyKernel
<<<
1
,
1
,
0
,
stream
>>>
(
ptr
);
DummyKernel
<<<
1
,
1
,
0
,
stream
>>>
(
ptr
);
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream
));
PADDLE_ENFORCE
(
cudaFree
(
ptr
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaFree
(
ptr
));
});
});
}
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录