Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e36f80c6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e36f80c6
编写于
4月 17, 2023
作者:
C
Chitsing KUI
提交者:
GitHub
4月 17, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Fused] controlled randomness for fused dropout add (#52903)
* add random control for fused dropout add * add __init__
上级
d19d2486
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
220 addition
and
19 deletion
+220
-19
paddle/phi/api/yaml/fused_backward.yaml
paddle/phi/api/yaml/fused_backward.yaml
+1
-1
paddle/phi/api/yaml/fused_ops.yaml
paddle/phi/api/yaml/fused_ops.yaml
+3
-1
paddle/phi/infermeta/binary.cc
paddle/phi/infermeta/binary.cc
+0
-5
paddle/phi/infermeta/binary.h
paddle/phi/infermeta/binary.h
+0
-5
paddle/phi/kernels/funcs/dropout_impl.cu.h
paddle/phi/kernels/funcs/dropout_impl.cu.h
+2
-0
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
+11
-2
python/paddle/distributed/auto_parallel/operators/__init__.py
...on/paddle/distributed/auto_parallel/operators/__init__.py
+1
-0
python/paddle/distributed/auto_parallel/operators/dist_fused_dropout_add.py
...ributed/auto_parallel/operators/dist_fused_dropout_add.py
+191
-0
python/paddle/distributed/passes/auto_parallel_recompute.py
python/paddle/distributed/passes/auto_parallel_recompute.py
+9
-4
python/paddle/incubate/nn/functional/fused_dropout_add.py
python/paddle/incubate/nn/functional/fused_dropout_add.py
+2
-1
未找到文件。
paddle/phi/api/yaml/fused_backward.yaml
浏览文件 @
e36f80c6
...
...
@@ -5,7 +5,7 @@
# otherwise the operator only could be used in static mode.
-
backward_op
:
fused_dropout_add_grad
forward
:
fused_dropout_add (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
forward
:
fused_dropout_add (Tensor x, Tensor y,
Tensor seed_tensor,
Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
args
:
(Tensor seed_offset, Tensor out_grad, Scalar p, bool is_test, str mode, bool fix_seed)
output
:
Tensor(x_grad), Tensor(y_grad)
infer_meta
:
...
...
paddle/phi/api/yaml/fused_ops.yaml
浏览文件 @
e36f80c6
...
...
@@ -34,10 +34,12 @@
optional
:
bias, x_max
-
op
:
fused_dropout_add
args
:
(Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
args
:
(Tensor x, Tensor y, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed = 0, bool fix_seed =
false
)
optional
:
seed_tensor
output
:
Tensor(out), Tensor(seed_offset)
infer_meta
:
func
:
FusedDropoutAddInferMeta
param
:
[
x
,
y
]
kernel
:
func
:
fused_dropout_add
data_type
:
x
...
...
paddle/phi/infermeta/binary.cc
浏览文件 @
e36f80c6
...
...
@@ -1282,11 +1282,6 @@ void FillDiagonalTensorInferMeta(const MetaTensor& x,
void
FusedDropoutAddInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
y
,
const
Scalar
&
p
,
bool
is_test
,
const
std
::
string
&
mode
,
int
seed
,
bool
fix_seed
,
MetaTensor
*
out
,
MetaTensor
*
seed_offset
)
{
out
->
share_meta
(
x
);
...
...
paddle/phi/infermeta/binary.h
浏览文件 @
e36f80c6
...
...
@@ -224,11 +224,6 @@ void FillDiagonalTensorInferMeta(const MetaTensor& x,
void
FusedDropoutAddInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
y
,
const
Scalar
&
p
,
bool
is_test
,
const
std
::
string
&
mode
,
int
seed
,
bool
fix_seed
,
MetaTensor
*
out
,
MetaTensor
*
seed_offset
);
...
...
paddle/phi/kernels/funcs/dropout_impl.cu.h
浏览文件 @
e36f80c6
...
...
@@ -408,6 +408,8 @@ void DropoutFwGPUKernelDriver(
main_offset
);
#undef PD_DROPOUT_KERNEL_NAME
}
VLOG
(
4
)
<<
"Dropout seed: "
<<
seed
<<
", offset: "
<<
offset
<<
", seed_data:"
<<
seed_data
;
}
else
{
if
(
upscale_in_train
)
{
// y = x
...
...
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
浏览文件 @
e36f80c6
...
...
@@ -139,6 +139,7 @@ template <typename T, typename Context>
void
FusedDropoutAddKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
paddle
::
optional
<
DenseTensor
>&
seed_tensor
,
const
Scalar
&
p
,
bool
is_test
,
const
std
::
string
&
mode
,
...
...
@@ -168,11 +169,19 @@ void FusedDropoutAddKernel(const Context& dev_ctx,
size_t
block_size
=
random_prop
[
1
];
size_t
offset
=
random_prop
[
2
];
size_t
main_offset
=
random_prop
[
3
];
funcs
::
GetSeedDataAndIncrement
(
dev_ctx
,
nullptr
,
fix_seed
,
seed
,
offset
,
&
seed_data
,
&
increment
);
funcs
::
GetSeedDataAndIncrement
(
dev_ctx
,
seed_tensor
.
get_ptr
(),
fix_seed
,
seed
,
offset
,
&
seed_data
,
&
increment
);
seed_offset_data
[
0
]
=
static_cast
<
int64_t
>
(
seed_data
);
seed_offset_data
[
1
]
=
static_cast
<
int64_t
>
(
increment
);
VLOG
(
4
)
<<
"FusedDropoutAdd seed: "
<<
seed
<<
", offset: "
<<
offset
<<
", seed_data:"
<<
seed_data
;
auto
dst_functor
=
NoMaskFwFunctor
<
T
,
float
>
(
1.0
f
-
dropout_rate
,
upscale_in_train
);
...
...
python/paddle/distributed/auto_parallel/operators/__init__.py
浏览文件 @
e36f80c6
...
...
@@ -32,6 +32,7 @@ from . import dist_pnorm
from
.
import
dist_slice
from
.
import
dist_fused_feedforward
from
.
import
dist_fused_attention
from
.
import
dist_fused_dropout_add
from
.
import
dist_reduce_sum_p
from
.
import
dist_shape
from
.
import
dist_assign
...
...
python/paddle/distributed/auto_parallel/operators/dist_fused_dropout_add.py
0 → 100644
浏览文件 @
e36f80c6
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import
logging
import
paddle
from
paddle.framework
import
core
from
paddle.utils
import
unique_name
from
...utils.log_utils
import
get_logger
_logger
=
get_logger
(
logging
.
INFO
)
from
..random
import
determinate_rng
,
is_enable_auto_rand_ctrl
from
..utils
import
(
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
,
set_var_dist_attr
,
)
from
.common
import
(
DistributedOperatorImplContainer
,
register_distributed_operator_impl
,
register_distributed_operator_impl_container
,
)
from
.dist_eltwise
import
DistributedDefaultImpl0
,
DistributedElementwiseImpl0
class
DistributedDropout
(
DistributedOperatorImplContainer
):
def
__init__
(
self
,
op_type
):
super
().
__init__
(
op_type
)
register_distributed_operator_impl_container
(
DistributedDropout
(
"fused_dropout_add"
)
)
# Dist Dropout with Random Control
# Dropout re-use the compatible and cost function of elementwise
class
DistributedDropoutImpl0
(
DistributedElementwiseImpl0
):
def
__init__
(
self
,
name
):
super
().
__init__
(
name
)
self
.
_forward_implemented
=
True
self
.
_backward_implemented
=
True
def
is_input_compatible
(
self
,
dist_op
):
return
True
def
is_output_compatible
(
self
,
dist_op
):
return
True
def
is_auto_compatible
(
self
,
dist_op
):
return
True
@
staticmethod
def
forward
(
ctx
,
*
args
,
**
kwargs
):
dist_op_context
=
ctx
.
dist_op_context
main_block
=
dist_op_context
.
work_block
startup_block
=
dist_op_context
.
startup_block
src_op
=
dist_op_context
.
cur_src_op
rank_id
=
dist_op_context
.
rank_id
op_dist_attr
=
ctx
.
get_op_dist_attr_for_program
(
src_op
)
if
is_enable_auto_rand_ctrl
()
and
not
op_dist_attr
.
is_recompute
:
assert
(
op_dist_attr
is
not
None
),
f
"forward op [
{
str
(
src_op
)
}
] don't have dist attribute !"
assert
'seed_tensor'
in
kwargs
,
"input [{}] is not given"
.
format
(
'seed_tensor'
)
if
(
src_op
.
has_attr
(
"fix_seed"
)
and
src_op
.
attr
(
"fix_seed"
)
and
src_op
.
has_attr
(
"seed"
)
and
src_op
.
attr
(
"seed"
)
):
_logger
.
info
(
"Auto Parallel Random Control Skiped Since manul seed is set by user: {}"
.
format
(
src_op
)
)
elif
rank_id
not
in
op_dist_attr
.
process_mesh
.
process_ids
:
pass
elif
(
len
(
kwargs
[
'seed_tensor'
])
>
0
or
len
(
src_op
.
input
(
"seed_tensor"
))
>
0
):
seed_var_name
=
kwargs
[
'seed_tensor'
][
0
]
if
seed_var_name
.
startswith
(
'rc_seed'
):
pre_op
=
main_block
.
ops
[
-
1
]
assert
(
pre_op
.
type
==
"seed"
and
len
(
pre_op
.
attr
(
"rng_name"
))
==
0
),
f
"found exception op
{
str
(
pre_op
)
}
"
# determinate rng
X_var
=
main_block
.
_var_recursive
(
kwargs
[
'x'
][
0
])
X_dims_mapping
=
op_dist_attr
.
get_input_dims_mapping
(
X_var
.
name
)
process_mesh
=
op_dist_attr
.
process_mesh
rng_name
=
determinate_rng
(
rank_id
,
X_dims_mapping
,
process_mesh
)
# make recompute seed under control
pre_op
.
_set_attr
(
"rng_name"
,
rng_name
)
pre_op
.
_set_attr
(
"deterministic"
,
True
)
pre_op
.
_set_attr
(
"force_cpu"
,
True
)
else
:
_logger
.
info
(
"Auto Parallel Random Control Skiped Since manul seed is set by user: {}"
.
format
(
src_op
)
)
else
:
# determinate rng
X_var
=
main_block
.
_var_recursive
(
kwargs
[
'x'
][
0
])
X_dims_mapping
=
op_dist_attr
.
get_input_dims_mapping
(
X_var
.
name
)
process_mesh
=
op_dist_attr
.
process_mesh
rng_name
=
determinate_rng
(
rank_id
,
X_dims_mapping
,
process_mesh
)
assert
rng_name
is
not
None
and
rng_name
!=
""
# insert seed op
seed_var
=
main_block
.
create_var
(
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
"tensor_parallel_seed"
,
'tmp'
])
),
dtype
=
paddle
.
int32
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
,
stop_gradient
=
False
,
)
# set new seed_var's dist_attr
seed_var_dims_mapping
=
[
-
1
]
seed_var_dist_attr
=
set_var_dist_attr
(
ctx
,
seed_var
,
seed_var_dims_mapping
,
process_mesh
)
# adopt for recompute
# force_cpu to reduce sync copy from CPU->GPU->CPU, and reduce pipeline hang
seed_op
=
main_block
.
append_op
(
type
=
'seed'
,
outputs
=
{
'Out'
:
seed_var
},
attrs
=
{
'deterministic'
:
True
,
'rng_name'
:
rng_name
,
'force_cpu'
:
True
,
},
)
seed_op
.
_set_attr
(
'op_namescope'
,
'auto_tensor_parallel_seed'
)
# set new seed op's dist_attr
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
seed_op
,
process_mesh
,
seed_var_dims_mapping
,
ctx
)
# modify dropout op
src_op
.
desc
.
set_input
(
"seed_tensor"
,
[
seed_var
.
name
])
src_op
.
_remove_attr
(
"fix_seed"
)
src_op
.
_remove_attr
(
"seed"
)
op_dist_attr
.
set_input_dist_attr
(
seed_var
.
name
,
seed_var_dist_attr
)
kwargs
[
'seed_tensor'
]
=
[
seed_var
.
name
]
DistributedDefaultImpl0
.
forward
(
ctx
,
*
args
,
**
kwargs
)
@
staticmethod
def
backward
(
ctx
,
*
args
,
**
kwargs
):
# dropout backward is deterministic by mask, and not need for random state control
DistributedDefaultImpl0
.
backward
(
ctx
,
*
args
,
**
kwargs
)
register_distributed_operator_impl
(
"fused_dropout_add"
,
DistributedDropoutImpl0
(
"random_control"
)
)
python/paddle/distributed/passes/auto_parallel_recompute.py
浏览文件 @
e36f80c6
...
...
@@ -115,7 +115,7 @@ class RecomputeState(ProgramStats):
a seed op before it to guarantee that two dropout op have the same outputs.
"""
op_types
=
[
op
.
type
for
op
in
self
.
ops
]
if
"dropout"
not
in
op_types
:
if
"dropout"
not
in
op_types
and
"fused_dropout_add"
not
in
op_types
:
return
op_idx
=
0
...
...
@@ -127,10 +127,15 @@ class RecomputeState(ProgramStats):
self
.
_reserved_vars
.
extend
(
cur_op
.
output_arg_names
)
op_idx
+=
1
continue
if
cur_op
.
type
!=
"dropout"
:
if
cur_op
.
type
not
in
[
"dropout"
,
"fused_dropout_add"
]
:
op_idx
+=
1
continue
if
cur_op
.
input
(
"Seed"
)
is
not
None
and
len
(
cur_op
.
input
(
"Seed"
)):
seed_tensor_name
=
(
"seed_tensor"
if
cur_op
.
type
==
"fused_dropout_add"
else
"Seed"
)
if
cur_op
.
input
(
seed_tensor_name
)
is
not
None
and
len
(
cur_op
.
input
(
seed_tensor_name
)
):
op_idx
+=
1
continue
...
...
@@ -179,7 +184,7 @@ class RecomputeState(ProgramStats):
# modify dropout op's desc
self
.
ops
.
insert
(
op_idx
,
seed_op
)
cur_op
.
desc
.
set_input
(
"Seed"
,
[
var_unique_name
])
cur_op
.
desc
.
set_input
(
seed_tensor_name
,
[
var_unique_name
])
cur_op
.
_remove_attr
(
"fix_seed"
)
cur_op
.
_remove_attr
(
"seed"
)
cur_op_dist_attr
.
set_input_dist_attr
(
...
...
python/paddle/incubate/nn/functional/fused_dropout_add.py
浏览文件 @
e36f80c6
...
...
@@ -79,6 +79,7 @@ def fused_dropout_add(
out
,
seed_offset
=
_C_ops
.
fused_dropout_add
(
x
,
y
,
None
,
p
,
not
training
,
mode
,
...
...
@@ -109,7 +110,7 @@ def fused_dropout_add(
helper
.
append_op
(
type
=
'fused_dropout_add'
,
inputs
=
{
'x'
:
x
,
'y'
:
y
},
inputs
=
{
'x'
:
x
,
'y'
:
y
,
'seed_tensor'
:
None
},
outputs
=
{
'out'
:
[
out
],
'seed_offset'
:
[
seed_offset
]},
attrs
=
attrs
,
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录