From 5bbfca15d6b9d5945c766c76709d56cdf36e6085 Mon Sep 17 00:00:00 2001 From: ziyoujiyi <73728031+ziyoujiyi@users.noreply.github.com> Date: Tue, 20 Sep 2022 15:17:39 +0800 Subject: [PATCH] Fl ps (#46258) * back fl * delete ssl cert * . * make warning * . * unittest paral degree * solve unittest * heter & multi cloud commm ready * . * . * fl-ps v1.0 * . * support N + N mode * . * . * . * . * delete print * . * . * . * . * fix bug * . * . * fl-ps with coordinator ready * merge dev * update message parse only * update fl client scheduler * fix bug * update multithreads sync * fix ci errors * update role_maker.py * update role_maker.py * fix ci error: windows py import error * fix ci error: windows py import error * fix windows ci pylib import error * add dump fields & params * try to fix windows import fleet error * fix ps FLAGS error * fix logging risk * fix logging possible risk * write trainer_desc file * support split sparse params in local & remote * fix import paddle.fluid.core.PSGPU * fix import paddle.fluid.core.PSGPU * add remote_sparse & local_sparse config * fix unittest * fix test_dist_fleet_geo table error * fix PADDLE_ENFORCE error * fix other's pr conflict * forbidden ssd table * . * recover ssd table code * recover file mode * debug auc 0.5 * adapt for nn fl-ps * adapt for nn fl-ps * add learning_rate_0 intializer op * recover ssd table * modify file mode * flps del fake-init op --- python/paddle/distributed/passes/ps_trainer_pass.py | 2 ++ python/paddle/distributed/ps/utils/ps_program_builder.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 982d3f61e6e..4ca11a02c3e 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -617,6 +617,8 @@ class DeleteExtraOptimizerPass(PassBase): for var in remote_optimize_vars: if var in local_optimize_vars: continue + if 'learning_rate_0' == var: + continue if var not in remote_optimize_op_role_vars: optimize_need_delete_vars.append(var) need_delete_optimize_vars = list(set(optimize_need_delete_vars)) diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py index 53771b05cbf..0bd870ffee5 100755 --- a/python/paddle/distributed/ps/utils/ps_program_builder.py +++ b/python/paddle/distributed/ps/utils/ps_program_builder.py @@ -373,8 +373,8 @@ class FlPsProgramBuilder(HeterAsyncPsProgramBuilder): _main_file = ps_log_root_dir + '4_fl_worker_main_program.prototxt' #debug_program(_main_file, self.cloned_main) - fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs) - fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx) + #fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs) + #fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx) _main_file = ps_log_root_dir + '5_fl_worker_main_program.prototxt' #debug_program(_main_file, self.cloned_main) -- GitLab