Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
57aabbab
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
57aabbab
编写于
7月 01, 2021
作者:
Y
Yuang Liu
提交者:
GitHub
7月 01, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gradient scale (#33862)
上级
3fc56aa0
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
119 addition
and
0 deletion
+119
-0
paddle/fluid/framework/distributed_strategy.proto
paddle/fluid/framework/distributed_strategy.proto
+11
-0
python/paddle/distributed/fleet/base/distributed_strategy.py
python/paddle/distributed/fleet/base/distributed_strategy.py
+22
-0
python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
...ibuted/fleet/meta_optimizers/graph_execution_optimizer.py
+12
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
...paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
+73
-0
未找到文件。
paddle/fluid/framework/distributed_strategy.proto
浏览文件 @
57aabbab
...
...
@@ -119,6 +119,16 @@ message ExecutionStrategy {
optional
bool
use_thread_barrier
=
4
[
default
=
false
];
}
message
GradientScaleConfig
{
// Optional value ['avg', 'sum', 'customized']
// If avg, loss@grad will be divided by the number of devices,
// that is, the gradient will be accumulated and averaged among
// multiple devices.
// Else if sum, the gradient will accumulated among multiple
// devices.
optional
string
scale_strategy
=
1
[
default
=
'avg'
];
}
message
AsyncConfig
{
optional
int32
k_steps
=
1
[
default
=
-
1
];
optional
int32
max_merge_var_num
=
2
[
default
=
1
];
...
...
@@ -195,6 +205,7 @@ message DistributedStrategy {
optional
TensorParallelConfig
tensor_parallel_configs
=
113
;
optional
BuildStrategy
build_strategy
=
201
;
optional
ExecutionStrategy
execution_strategy
=
202
;
optional
GradientScaleConfig
gradient_scale_configs
=
203
;
}
message
DistributedJobInfo
{
...
...
python/paddle/distributed/fleet/base/distributed_strategy.py
浏览文件 @
57aabbab
...
...
@@ -254,6 +254,28 @@ class DistributedStrategy(object):
getattr
(
self
.
strategy
.
build_strategy
,
f
.
name
).
extend
(
getattr
(
strategy
,
f
.
name
))
@
property
def
gradient_scale_configs
(
self
):
"""
Set the strategy of gradient scale
Examples:
.. code-block:: python
import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.gradient_scale_configs = {'scale_strategy': 'avg'}
Note that, strategy must be in 'avg', 'sum' or 'customized'
"""
return
get_msg_dict
(
self
.
strategy
.
gradient_scale_configs
)
@
gradient_scale_configs
.
setter
@
is_strict_auto
def
gradient_scale_configs
(
self
,
config
):
check_configs_key
(
self
.
strategy
.
gradient_scale_configs
,
config
,
'gradient_scale_configs'
)
assign_configs_value
(
self
.
strategy
.
gradient_scale_configs
,
config
)
@
property
def
a_sync
(
self
):
"""
...
...
python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
浏览文件 @
57aabbab
...
...
@@ -18,6 +18,7 @@ from paddle.fluid import compiler
from
.meta_optimizer_base
import
MetaOptimizerBase
from
..base.private_helper_function
import
wait_server_ready
import
logging
from
paddle.static
import
BuildStrategy
__all__
=
[]
...
...
@@ -147,6 +148,17 @@ class GraphExecutionOptimizer(MetaOptimizerBase):
local_build_strategy
.
nccl_comm_num
=
\
dist_strategy
.
nccl_comm_num
gradient_scale_configs
=
self
.
user_defined_strategy
.
gradient_scale_configs
scale_strategys
=
{
'avg'
:
BuildStrategy
.
GradientScaleStrategy
.
CoeffNumDevice
,
'sum'
:
BuildStrategy
.
GradientScaleStrategy
.
One
,
'customized'
:
BuildStrategy
.
GradientScaleStrategy
.
Customized
,
}
assert
gradient_scale_configs
[
'scale_strategy'
]
in
scale_strategys
,
\
"gradient_scale_configs.scale_strategy must be 'avg', 'sum' or 'customized'"
local_build_strategy
.
gradient_scale_strategy
=
\
scale_strategys
[
gradient_scale_configs
[
'scale_strategy'
]]
if
self
.
user_defined_strategy
.
recompute
==
True
:
logging
.
warn
(
"set enable_sequential_execution=True since you have enable the recompute strategy"
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
57aabbab
...
...
@@ -107,6 +107,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_wait
)
LIST
(
REMOVE_ITEM TEST_OPS test_memcpy_op
)
LIST
(
REMOVE_ITEM TEST_OPS test_raw_program_optimizer
)
LIST
(
REMOVE_ITEM TEST_OPS test_fleet_gradient_scale
)
endif
()
if
(
WIN32
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
0 → 100644
浏览文件 @
57aabbab
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet
as
fleet
import
numpy
as
np
import
os
class
TestGradientScale
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"0"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
def
mlp
(
self
,
input_x
,
input_y
,
hid_dim
=
128
,
label_dim
=
2
):
fc_1
=
paddle
.
static
.
nn
.
fc
(
x
=
input_x
,
size
=
hid_dim
,
activation
=
'tanh'
)
fc_2
=
paddle
.
static
.
nn
.
fc
(
x
=
fc_1
,
size
=
hid_dim
,
activation
=
'tanh'
)
prediction
=
paddle
.
static
.
nn
.
fc
(
x
=
[
fc_2
],
size
=
label_dim
,
activation
=
'softmax'
)
cost
=
paddle
.
nn
.
functional
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
mean
(
x
=
cost
)
return
avg_cost
def
gen_data
(
self
):
return
{
"x"
:
np
.
random
.
random
(
size
=
(
128
,
32
)).
astype
(
'float32'
),
"y"
:
np
.
random
.
randint
(
2
,
size
=
(
128
,
1
)).
astype
(
'int64'
)
}
def
test_single_gpu
(
self
):
paddle
.
enable_static
()
fleet
.
init
(
is_collective
=
True
)
main_program
=
paddle
.
static
.
Program
()
startup_program
=
paddle
.
static
.
Program
()
strategy
=
fleet
.
DistributedStrategy
()
strategy
.
gradient_scale_configs
=
{
'scale_strategy'
:
'sum'
}
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
input_x
=
paddle
.
static
.
data
(
name
=
"x"
,
shape
=
[
None
,
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
static
.
data
(
name
=
"y"
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
cost
=
self
.
mlp
(
input_x
=
input_x
,
input_y
=
input_y
)
output_name
=
cost
.
name
optimizer
=
fleet
.
distributed_optimizer
(
fluid
.
optimizer
.
Adam
(),
strategy
)
optimizer
.
minimize
(
cost
)
final_strategy
=
fleet
.
_final_strategy
()
assert
final_strategy
.
gradient_scale_configs
[
'scale_strategy'
]
==
'sum'
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录