Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7edfac9e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7edfac9e
编写于
2月 09, 2023
作者:
W
wangzhen38
提交者:
GitHub
2月 09, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[rm fluid] for the non distribution (#50313)
上级
d93c63a0
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
62 addition
and
55 deletion
+62
-55
python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py
.../fleet/parameter_server/distribute_transpiler/__init__.py
+19
-17
python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py
.../incubate/fleet/parameter_server/ir/heter_trainer_pass.py
+3
-4
python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
.../fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
+1
-2
python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py
...paddle/fluid/incubate/fleet/parameter_server/ir/public.py
+24
-18
python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
.../fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
+5
-5
python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py
...luid/incubate/fleet/parameter_server/ir/vars_metatools.py
+2
-2
python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
...e/fluid/incubate/fleet/parameter_server/pslib/__init__.py
+3
-4
python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
...ncubate/fleet/parameter_server/pslib/optimizer_factory.py
+3
-3
python/paddle/framework/__init__.py
python/paddle/framework/__init__.py
+1
-0
python/paddle/static/__init__.py
python/paddle/static/__init__.py
+1
-0
未找到文件。
python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py
浏览文件 @
7edfac9e
...
...
@@ -12,20 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Convert the
fluid
program to distributed data-parallelism programs.
Convert the
static
program to distributed data-parallelism programs.
"""
import
os
import
sys
import
warnings
from
paddle
import
fluid
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
default_main_program
from
paddle.fluid.framework
import
default_startup_program
from
paddle.fluid.framework
import
Program
import
paddle
from
paddle.framework
import
core
from
paddle.static
import
(
default_main_program
,
default_startup_program
,
Program
,
Executor
,
)
from
paddle.fluid.compiler
import
CompiledProgram
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.parallel_executor
import
ParallelExecutor
from
paddle.fluid.optimizer
import
Optimizer
...
...
@@ -274,7 +276,7 @@ class FleetTranspiler(Fleet):
)
)
fluid
.
io
.
load_vars
(
paddle
.
static
.
load_vars
(
self
.
_executor
,
main_program
=
self
.
main_program
,
dirname
=
model_dir
,
...
...
@@ -430,8 +432,8 @@ class FleetTranspiler(Fleet):
)
# Todo(MrChengmo): support recv&save GPU-Kernel for ps-gpu model save
if
not
isinstance
(
executor
.
place
,
fluid
.
CPUPlace
):
save_executor
=
Executor
(
fluid
.
CPUPlace
())
if
not
isinstance
(
executor
.
place
,
paddle
.
CPUPlace
):
save_executor
=
Executor
(
paddle
.
CPUPlace
())
else
:
save_executor
=
executor
...
...
@@ -440,7 +442,7 @@ class FleetTranspiler(Fleet):
raise
TypeError
(
"in fleet.save_inference_model() function, main_program must be as Program type, CompiledProgram is not allowed"
)
fluid
.
io
.
save_inference_model
(
paddle
.
static
.
save_inference_model
(
dirname
,
feeded_var_names
,
target_vars
,
...
...
@@ -451,7 +453,7 @@ class FleetTranspiler(Fleet):
export_for_deployment
,
)
else
:
fluid
.
io
.
save_inference_model
(
paddle
.
static
.
save_inference_model
(
dirname
,
feeded_var_names
,
target_vars
,
...
...
@@ -733,7 +735,7 @@ class FleetTranspiler(Fleet):
)
)
fluid
.
io
.
save_vars
(
paddle
.
static
.
save_vars
(
executor
,
main_program
=
main_program
,
dirname
=
dirname
,
...
...
@@ -766,8 +768,8 @@ class FleetTranspiler(Fleet):
"in fleet.save_persistables() function, executor must be as Executor type"
)
# Todo(MrChengmo): support recv&save GPU-Kernel for ps-gpu model save
if
not
isinstance
(
executor
.
place
,
fluid
.
CPUPlace
):
save_executor
=
Executor
(
fluid
.
CPUPlace
())
if
not
isinstance
(
executor
.
place
,
paddle
.
CPUPlace
):
save_executor
=
Executor
(
paddle
.
CPUPlace
())
else
:
save_executor
=
executor
...
...
@@ -894,8 +896,8 @@ class ParameterServerOptimizer(DistributedOptimizer):
return
_main
,
_startup
def
_build_pserver_programs
(
self
,
compiled_config
):
_main
=
fluid
.
Program
()
_startup
=
fluid
.
Program
()
_main
=
paddle
.
static
.
Program
()
_startup
=
paddle
.
static
.
Program
()
if
not
compiled_config
.
is_geo_mode
():
_main
=
server
.
add_listen_and_serv_pass
(
_main
,
compiled_config
)
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py
浏览文件 @
7edfac9e
...
...
@@ -14,10 +14,9 @@
import
warnings
import
paddle.f
luid
.core
as
core
import
paddle
.fluid.framework
as
framework
import
paddle.f
ramework
.core
as
core
import
paddle
from
paddle.fluid.transpiler.details.program_utils
import
delete_ops
from
paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass
import
(
find_heter_ops
,
)
...
...
@@ -60,7 +59,7 @@ def split_heter_worker_ops_pass(program, config, stage_id, device):
program_block_ops
=
union_forward_gradient_op
(
program_block_ops
)
block_vars_detail
=
find_block_joints
(
program
,
program_block_ops
,
heter_ops
)
heter_program
=
framework
.
Program
()
heter_program
=
paddle
.
static
.
Program
()
create_heter_program
(
program
,
config
,
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
浏览文件 @
7edfac9e
...
...
@@ -14,8 +14,7 @@
import
collections
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
Block
from
paddle.framework
import
core
,
Block
from
paddle.fluid.incubate.fleet.parameter_server.ir.public
import
(
_get_optimize_ops
,
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py
浏览文件 @
7edfac9e
...
...
@@ -14,15 +14,13 @@
from
functools
import
reduce
import
paddle
import
collections
import
math
import
os
import
warnings
import
logging
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.core
import
CommContext
import
paddle.fluid.framework
as
framework
from
paddle.framework
import
core
from
paddle.fluid.incubate.fleet.parameter_server.mode
import
DistributedMode
from
paddle.fluid.incubate.fleet.parameter_server.ir
import
vars_metatools
from
paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher
import
(
...
...
@@ -415,7 +413,7 @@ class CompileTimeStrategy:
trainer_id
=
self
.
get_role_id
()
aggregate
=
True
ctx
=
CommContext
(
ctx
=
core
.
CommContext
(
name
,
names
,
eps
,
...
...
@@ -493,7 +491,7 @@ class CompileTimeStrategy:
is_distributed
,
)
ctx
=
CommContext
(
ctx
=
core
.
CommContext
(
param_ctx
.
var_name
(),
param_ctx
.
split_varnames
(),
param_ctx
.
split_endpoints
(),
...
...
@@ -659,7 +657,7 @@ class CompileTimeStrategy:
]
var_numel
=
reduce
(
lambda
x
,
y
:
x
*
y
,
var
.
shape
[
1
:])
sparse_ctx
=
CommContext
(
sparse_ctx
=
core
.
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
...
...
@@ -714,7 +712,7 @@ class CompileTimeStrategy:
grad_name
=
"Dense@Grad"
trainer_id
=
self
.
get_role_id
()
aggregate
=
True
dense_ctx
=
CommContext
(
dense_ctx
=
core
.
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
...
...
@@ -742,7 +740,7 @@ class CompileTimeStrategy:
var_numel
=
reduce
(
lambda
x
,
y
:
x
*
y
,
var
.
shape
)
grad_name
=
origin_varname
aggregate
=
True
dense_ctx
=
CommContext
(
dense_ctx
=
core
.
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
...
...
@@ -809,7 +807,7 @@ class CompileTimeStrategy:
shape
=
list
(
var
.
shape
)
shape
[
0
]
=
0
if
is_distributed
else
shape
[
0
]
sparse_ctx
=
CommContext
(
sparse_ctx
=
core
.
CommContext
(
grad_name
,
splited_varname
,
ep_list
,
...
...
@@ -901,7 +899,7 @@ class CompileTimeStrategy:
endpoints
=
self
.
get_ps_endpoints
()
sections
=
[
1
]
*
len
(
endpoints
)
names
=
[
name
]
*
len
(
endpoints
)
ctx
=
CommContext
(
ctx
=
core
.
CommContext
(
name
,
names
,
endpoints
,
...
...
@@ -1417,7 +1415,7 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
NaturalExpDecay
,
InverseTimeDecay
,
)
from
paddle.
fluid.layers
.learning_rate_scheduler
import
(
from
paddle.
static
.learning_rate_scheduler
import
(
exponential_decay
,
noam_decay
,
piecewise_decay
,
...
...
@@ -1425,12 +1423,14 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
inverse_time_decay
,
)
decay_main_program
=
fluid
.
framework
.
Program
()
decay_startup_program
=
fluid
.
framework
.
Program
()
decay_main_program
=
paddle
.
static
.
Program
()
decay_startup_program
=
paddle
.
static
.
Program
()
lr_name
=
""
if
isinstance
(
lr_sheduler
,
ExponentialDecay
):
with
fluid
.
program_guard
(
decay_main_program
,
decay_startup_program
):
with
paddle
.
static
.
program_guard
(
decay_main_program
,
decay_startup_program
):
lr
=
exponential_decay
(
1.0
,
lr_decay_steps
,
lr_sheduler
.
gamma
,
True
)
lr_name
=
lr
.
name
logging
.
warn
(
...
...
@@ -1441,7 +1441,9 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
%
lr_decay_steps
)
elif
isinstance
(
lr_sheduler
,
NoamDecay
):
with
fluid
.
program_guard
(
decay_main_program
,
decay_startup_program
):
with
paddle
.
static
.
program_guard
(
decay_main_program
,
decay_startup_program
):
lr
=
noam_decay
(
lr_sheduler
.
d_model
,
lr_sheduler
.
warmup_steps
,
1.0
)
lr_name
=
lr
.
name
logging
.
warn
(
...
...
@@ -1449,7 +1451,9 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
%
lr_sheduler
.
warmup_steps
)
elif
isinstance
(
lr_sheduler
,
NaturalExpDecay
):
with
fluid
.
program_guard
(
decay_main_program
,
decay_startup_program
):
with
paddle
.
static
.
program_guard
(
decay_main_program
,
decay_startup_program
):
lr
=
natural_exp_decay
(
1.0
,
lr_decay_steps
,
lr_sheduler
.
gamma
,
True
)
lr_name
=
lr
.
name
logging
.
warn
(
...
...
@@ -1460,7 +1464,9 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
%
lr_decay_steps
)
elif
isinstance
(
lr_sheduler
,
InverseTimeDecay
):
with
fluid
.
program_guard
(
decay_main_program
,
decay_startup_program
):
with
paddle
.
static
.
program_guard
(
decay_main_program
,
decay_startup_program
):
lr
=
inverse_time_decay
(
1.0
,
lr_decay_steps
,
lr_sheduler
.
gamma
,
True
)
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
浏览文件 @
7edfac9e
...
...
@@ -19,9 +19,9 @@ import warnings
import
math
from
functools
import
reduce
import
paddle
.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.f
luid.f
ramework
as
framework
import
paddle
from
paddle.framework
import
core
import
paddle.framework
as
framework
from
paddle.fluid.transpiler.details.program_utils
import
delete_ops
from
paddle.fluid.incubate.fleet.parameter_server.ir.public
import
(
...
...
@@ -962,7 +962,7 @@ def find_heter_ops(program, default_device="cpu"):
if
len
(
heter_ops
)
==
0
:
warnings
.
warn
(
"No heterogeneous OP was found in your program , "
" please using
fluid
.device_guard() to run OPs on different device."
" please using
paddle.static
.device_guard() to run OPs on different device."
)
total_heter_ops
=
0
...
...
@@ -1824,7 +1824,7 @@ def screen_persistables(program, var_list):
else
:
var
=
program
.
global_block
().
vars
[
var_name
]
if
fluid
.
io
.
is_persistable
(
var
):
if
paddle
.
static
.
is_persistable
(
var
):
need_remove
.
append
(
var_name
)
for
var_name
in
need_remove
:
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py
浏览文件 @
7edfac9e
...
...
@@ -13,8 +13,8 @@
# limitations under the License.
from
functools
import
reduce
from
paddle.f
luid.framework
import
Variable
from
paddle.f
luid
import
core
from
paddle.f
ramework.io
import
Variable
from
paddle.f
ramework
import
core
dtype_to_size
=
{
core
.
VarDesc
.
VarType
.
FP16
:
2
,
...
...
python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
浏览文件 @
7edfac9e
...
...
@@ -16,8 +16,7 @@ import os
import
sys
from
.optimizer_factory
import
*
from
google.protobuf
import
text_format
import
paddle.fluid
as
fluid
from
paddle.fluid.framework
import
Program
from
paddle.framework
import
core
from
paddle.fluid.incubate.fleet.base.fleet_base
import
Fleet
from
paddle.fluid.incubate.fleet.base.mode
import
Mode
...
...
@@ -44,10 +43,10 @@ class PSLib(Fleet):
if
role_maker
is
None
:
role_maker
=
MPISymetricRoleMaker
()
super
().
init
(
role_maker
)
self
.
_fleet_ptr
=
fluid
.
core
.
Fleet
()
self
.
_fleet_ptr
=
core
.
Fleet
()
self
.
_heter_ptr
=
None
if
isinstance
(
role_maker
,
HeterRoleMaker
):
self
.
_heter_ptr
=
fluid
.
core
.
Heter
()
self
.
_heter_ptr
=
core
.
Heter
()
def
_set_client_communication_config
(
self
,
request_timeout_ms
,
connect_timeout_ms
,
max_retry
...
...
python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
浏览文件 @
7edfac9e
...
...
@@ -14,8 +14,8 @@
"""Optimizer Factory."""
__all__
=
[
"DistributedAdam"
,
"FLEET_GLOBAL_DICT"
]
import
paddle
.fluid
as
fluid
from
paddle.f
luid
import
core
import
paddle
from
paddle.f
ramework
import
core
from
paddle.fluid.distribute_lookup_table
import
find_distributed_lookup_table
from
paddle.fluid.distribute_lookup_table
import
(
find_distributed_lookup_table_inputs
,
...
...
@@ -504,7 +504,7 @@ class DistributedAdam(DistributedOptimizerImplBase):
prog_id
=
str
(
id
(
loss
.
block
.
program
))
# param_grads of program
params_grads
=
sorted
(
fluid
.
backward
.
append_backward
(
loss
,
parameters
,
no_grad_set
),
paddle
.
static
.
append_backward
(
loss
,
parameters
,
no_grad_set
),
key
=
lambda
x
:
x
[
0
].
name
,
)
...
...
python/paddle/framework/__init__.py
浏览文件 @
7edfac9e
...
...
@@ -61,6 +61,7 @@ from ..fluid.framework import (
OpProtoHolder
,
)
# noqa: F401
from
..fluid.framework
import
_dygraph_tracer
# noqa: F401
from
..fluid.framework
import
generate_control_dev_var_name
# noqa: F401
from
..fluid.layer_helper
import
LayerHelper
# noqa: F401
from
..fluid.framework
import
in_dygraph_mode
# noqa: F401
...
...
python/paddle/static/__init__.py
浏览文件 @
7edfac9e
...
...
@@ -76,6 +76,7 @@ from ..fluid.io import batch # noqa: F401
from
..fluid.contrib.layers
import
ctr_metric_bundle
# noqa: F401
from
..fluid.layers
import
exponential_decay
# noqa: F401
from
..fluid.layers
import
learning_rate_scheduler
# noqa: F401
from
.nn.metric
import
auc
# noqa: F401
from
.nn.metric
import
accuracy
# noqa: F401
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录