From a2a97cbbac10a050e6ad13999926867e1a4aaafe Mon Sep 17 00:00:00 2001 From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com> Date: Wed, 16 Nov 2022 15:48:33 +0800 Subject: [PATCH] [remove fluid] under fleet meta_optimizers (#47864) * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers * [remove fluid] under fleet meta_optimizers --- python/paddle/distributed/__init__.py | 3 +- .../ascend/ascend_optimizer.py | 4 +- .../meta_optimizers/ascend/ascend_parser.py | 2 +- .../heter_parallel_optimizer.py | 8 +- .../hybrid_parallel_gradscaler.py | 4 +- .../hybrid_parallel_optimizer.py | 44 +-- .../sharding_optimizer_stage2.py | 2 +- .../meta_optimizers/sharding/fp16_helper.py | 2 +- .../sharding/offload_helper.py | 3 +- .../fleet/meta_optimizers/sharding/utils.py | 9 +- python/paddle/distributed/io.py | 288 ++++++++++++++++++ .../incubate/fleet/collective/__init__.py | 4 +- .../fluid/tests/unittests/dist_save_load.py | 6 +- .../fluid/tests/unittests/test_dist_base.py | 2 +- .../unittests/test_dist_sparse_load_ps0.py | 4 +- .../fluid/tests/unittests/test_load_op.py | 3 +- .../fluid/tests/unittests/test_load_op_xpu.py | 2 +- .../tests/unittests/test_static_save_load.py | 10 +- python/paddle/framework/__init__.py | 1 + 19 files changed, 350 insertions(+), 51 deletions(-) create mode 100644 python/paddle/distributed/io.py diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index 3612d00904..4db153c53b 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from . import io from .spawn import spawn # noqa: F401 from .launch.main import launch # noqa: F401 - from .parallel import init_parallel_env # noqa: F401 from .parallel import get_rank # noqa: F401 from .parallel import get_world_size # noqa: F401 @@ -74,6 +74,7 @@ from .sharding import save_group_sharded_model # noqa: F401 from . import rpc __all__ = [ # noqa + "io", "spawn", "launch", "scatter", diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index b0495e13b2..64c1881223 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.optimizer import Optimizer -import paddle.fluid.core as core +from paddle.optimizer import Optimizer +import paddle.framework.core as core from . import ascend_parser from paddle.distributed import fleet import hccl.manage.api as hccl diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 3be5636b25..79f79a8dea 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid.core as core +import paddle.framework.core as core import numpy as np from functools import reduce diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py index a2a65d995a..44655876e1 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.dygraph import base as imperative_base -from paddle.fluid import framework +import paddle.autograd as imperative_base +from paddle import framework __all__ = [] @@ -41,13 +41,13 @@ class HeterParallelOptimizer: # NOTE(liubo48): In pure DataParallel mode, # the gradient synchronization is achieved through reducer. - @imperative_base.no_grad + @imperative_base.no_grad() @framework.dygraph_only def step(self): parameters_list = _obtain_optimizer_parameters_list(self._inner_opt) self._inner_opt.step() - @imperative_base.no_grad + @imperative_base.no_grad() def minimize( self, loss, startup_program=None, parameters=None, no_grad_set=None ): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index d768411dea..6d723a3af7 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -13,7 +13,7 @@ # limitations under the License. from ...base.topology import ParallelMode -from paddle.fluid.dygraph import base as imperative_base +import paddle.autograd as imperative_base import paddle from paddle import _legacy_C_ops @@ -51,7 +51,7 @@ class HybridParallelGradScaler: return optimize_ops, params_grads - @imperative_base.no_grad + @imperative_base.no_grad() def _unscale(self, optimizer): if not self._enable: return diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index bd05cbe879..38c9b7b2bf 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -19,10 +19,10 @@ from ...utils.hybrid_parallel_util import ( sharding_reduce_gradients, ) from ...base.topology import ParallelMode -from paddle.fluid.dygraph import base as imperative_base -from paddle.fluid import framework +from paddle.autograd import no_grad +from paddle import framework from ...utils.log_util import logger -from paddle.fluid import core +from paddle.framework import core from paddle.fluid import layers __all__ = [] @@ -47,7 +47,7 @@ class HybridParallelClipGrad: self._clip = clip self._hcg = hcg - @imperative_base.no_grad + @no_grad() def _dygraph_clip(self, params_grads): sum_square_dist_fp16 = [] sum_square_dist_fp32 = [] @@ -63,8 +63,8 @@ class HybridParallelClipGrad: if g.type == core.VarDesc.VarType.SELECTED_ROWS: merge_grad = layers.merge_selected_rows(g) merge_grad = layers.get_tensor_from_selected_rows(merge_grad) - square = layers.square(merge_grad) - sum_square = layers.reduce_sum(square) + square = paddle.square(merge_grad) + sum_square = paddle.sum(square) not_shared_enable = (not hasattr(p, 'is_firstly_shared')) or ( hasattr(p, 'is_firstly_shared') @@ -89,8 +89,8 @@ class HybridParallelClipGrad: [0.0], dtype=paddle.float32 ) else: - global_norm_dist_fp16 = layers.concat(sum_square_dist_fp16) - global_norm_dist_fp16 = layers.reduce_sum(global_norm_dist_fp16) + global_norm_dist_fp16 = paddle.concat(sum_square_dist_fp16) + global_norm_dist_fp16 = paddle.sum(global_norm_dist_fp16) global_norm_dist_fp16 = paddle.cast( global_norm_dist_fp16, dtype=paddle.float32 ) @@ -101,29 +101,27 @@ class HybridParallelClipGrad: [0.0], dtype=paddle.float32 ) else: - global_norm_not_dist_fp16 = layers.concat(sum_square_not_dist_fp16) - global_norm_not_dist_fp16 = layers.reduce_sum( - global_norm_not_dist_fp16 - ) + global_norm_not_dist_fp16 = paddle.concat(sum_square_not_dist_fp16) + global_norm_not_dist_fp16 = paddle.sum(global_norm_not_dist_fp16) global_norm_not_dist_fp16 = paddle.cast( global_norm_not_dist_fp16, dtype=paddle.float32 ) # global norm of distributed FP32 params_and_grads global_norm_dist_fp32 = ( - layers.concat(sum_square_dist_fp32) + paddle.concat(sum_square_dist_fp32) if len(sum_square_dist_fp32) != 0 else paddle.to_tensor([0.0], dtype=paddle.float32) ) - global_norm_dist_fp32 = layers.reduce_sum(global_norm_dist_fp32) + global_norm_dist_fp32 = paddle.sum(global_norm_dist_fp32) # global norm of non-distributed FP32 params_and_grads global_norm_not_dist_fp32 = ( - layers.concat(sum_square_not_dist_fp32) + paddle.concat(sum_square_not_dist_fp32) if len(sum_square_not_dist_fp32) != 0 else paddle.to_tensor([0.0], dtype=paddle.float32) ) - global_norm_not_dist_fp32 = layers.reduce_sum(global_norm_not_dist_fp32) + global_norm_not_dist_fp32 = paddle.sum(global_norm_not_dist_fp32) global_norm_var_dist = global_norm_dist_fp16 + global_norm_dist_fp32 global_norm_var_not_dist = ( @@ -151,14 +149,16 @@ class HybridParallelClipGrad: group=self._hcg.get_sharding_parallel_group(), ) - global_norm_var_fp32 = layers.sqrt( + global_norm_var_fp32 = paddle.sqrt( global_norm_var_dist + global_norm_var_not_dist ) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var_fp32.dtype, value=self.clip_norm + max_global_norm = paddle.full( + shape=[1], + dtype=global_norm_var_fp32.dtype, + fill_value=self.clip_norm, ) - clip_var = layers.elementwise_div( + clip_var = paddle.divide( x=max_global_norm, y=paddle.maximum(x=global_norm_var_fp32, y=max_global_norm), ) @@ -229,7 +229,7 @@ class HybridParallelOptimizer: self._inner_opt._grad_clip, hcg ) - @imperative_base.no_grad + @no_grad() @framework.dygraph_only def step(self): parameters_list = _obtain_optimizer_parameters_list(self._inner_opt) @@ -241,7 +241,7 @@ class HybridParallelOptimizer: self._inner_opt.step() - @imperative_base.no_grad + @no_grad() def minimize( self, loss, startup_program=None, parameters=None, no_grad_set=None ): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py index c2bacc6a66..615980ab52 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py @@ -28,7 +28,7 @@ from collections import OrderedDict import paddle import paddle.distributed as dist -from paddle.fluid import core +from paddle.framework import core from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm from paddle.distributed.collective import ( diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index 1c500ea56b..f1244c30df 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -18,7 +18,7 @@ from paddle.distributed.fleet.meta_optimizers.common import ( OpRole, ) -from paddle.fluid import core +from paddle.framework import core __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index c1951299c2..058b2adc8e 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -13,7 +13,8 @@ # limitations under the License. from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op -from paddle.fluid import core, unique_name +from paddle.framework import core +from paddle.utils import unique_name __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py index 9feed7b1e5..e5f794e51a 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import paddle -from paddle.fluid import core, unique_name +from paddle.framework import core +from paddle.utils import unique_name from functools import reduce from paddle.distributed.fleet.meta_optimizers.common import ( is_loss_grad_op, @@ -1046,11 +1047,11 @@ def save_persistables(exe, dirname, main_program, filename=None): ) if int(os.environ.get('PADDLE_TRAINER_ID', 0)) == 0: - paddle.fluid.io.save_persistables( - exe, dirname, main_program=main_program, filename=None + paddle.distributed.io.save_persistables( + exe, dirname, main_program=main_program, filename=filename ) else: - paddle.fluid.io.save_vars( + paddle.static.save_vars( exe, dirname, main_program=main_program, diff --git a/python/paddle/distributed/io.py b/python/paddle/distributed/io.py new file mode 100644 index 0000000000..47c3368c5f --- /dev/null +++ b/python/paddle/distributed/io.py @@ -0,0 +1,288 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +from paddle.framework import dygraph_not_support, core +from paddle.fluid.framework import Program + + +def _save_distributed_persistables(executor, dirname, main_program): + """ + save_persistables for distributed training. + the method will do things listed below: + 1.save part of persistable variables on trainer. + 2.receive "remote prefetch variables" from parameter servers and merge them. + 3.save "distributed lookup table" on parameter servers. + 4.receive "optimizer variables" from parameter servers and merge them. + + Args: + executor(Executor): The executor to run for saving parameters. + dirname(str): The saving directory path. + main_program(Program): The program whose parameters will be + saved. the main_program must be the trainer_program + get after transpiler. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + import paddle + + paddle.enable_static() + exe = paddle.static.Executor(paddle.CPUPlace()) + param_path = "./my_paddle_model" + t = distribute_transpiler.DistributeTranspiler() + t.transpile(...) + train_program = t.get_trainer_program() + _save_distributed_persistables(executor=exe, dirname=param_path, main_program=train_program) + """ + + def __save_remote_params(executor, dirname, remote_params_map): + """ + receive params on pserver through rpc. + if the params are be sliced, will concat them to one, then save it. + """ + if not remote_params_map: + return + + prog = paddle.static.Program() + block = prog.global_block() + + # recv optimize vars from pserver + for name, remote_params in remote_params_map.items(): + origin = remote_params[0].origin + is_slice = remote_params[0].is_slice + + slices = [None] * len(remote_params) + slice_varnames = [None] * len(remote_params) + remote_varnames = [None] * len(remote_params) + endpoints = [None] * len(remote_params) + + for idx, optimizer in enumerate(remote_params): + block_id = optimizer.block_id + slice = optimizer.slice + endpoint = optimizer.endpoint + + index = block_id if is_slice else idx + slices[index] = slice + slice_varnames[index] = "{}.slice.{}".format(slice.name, idx) + remote_varnames[index] = slice.name + endpoints[index] = endpoint + + slice_shapes = [] + for slice in slices: + tmp = [str(dim) for dim in slice.shape] + slice_shapes.append(",".join(tmp)) + + block.append_op( + type='recv_save', + attrs={ + "trainer_id": 0, + "shape": origin.shape, + "slice_shapes": slice_shapes, + "slice_varnames": slice_varnames, + "remote_varnames": remote_varnames, + "endpoints": endpoints, + "file_path": os.path.join(dirname, origin.name), + }, + ) + + executor.run(prog) + + def __save_distributed_lookup_tables( + executor, dirname, distributed_lookup_table, endpoints + ): + """ + because the distributed lookup table may too huge to merge and save at one place, + it will be saved at parameter server independent respectively. + + the save directory is dirname/"__lookup_table__". + + """ + prog = paddle.static.Program() + block = prog.global_block() + + # if there is lookup table, the trainer 0 will notify all pserver to save. + lookup_table_filename = os.path.join(dirname, "__lookup_table__") + attrs = {} + attrs['epmap'] = endpoints + attrs['dir'] = lookup_table_filename + attrs['lookup_table'] = distributed_lookup_table + block.append_op( + type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs + ) + executor.run(prog) + + def __exclude_vars(exclude_var_names=[]): + def is_valid(var): + if var.name in exclude_var_names: + return False + if ( + var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH + or var.desc.type() == core.VarDesc.VarType.FETCH_LIST + or var.desc.type() == core.VarDesc.VarType.READER + ): + return False + return var.persistable + + return is_valid + + if not isinstance(main_program, Program): + raise TypeError("'main_program' should be an instance of Program.") + + if not main_program._is_distributed: + raise ValueError( + "'_save_distributed_persistables' just be designed for distributed training." + ) + + remote_params_map = ( + main_program._parameters_on_pservers.get_distributed_vars_by_vtypes( + ["Optimizer", "RemotePrefetch"], groupby=True + ) + ) + + exclude_var_names = [] + if remote_params_map: + exclude_var_names.extend(remote_params_map.keys()) + + if main_program._distributed_lookup_table: + if isinstance(main_program._distributed_lookup_table, list): + exclude_var_names.extend(main_program._distributed_lookup_table) + else: + exclude_var_names.append(main_program._distributed_lookup_table) + + local_vars = list( + filter(__exclude_vars(exclude_var_names), main_program.list_vars()) + ) + paddle.static.save_vars( + executor, main_program=main_program, dirname=dirname, vars=local_vars + ) + + if main_program._is_chief: + if remote_params_map: + __save_remote_params(executor, dirname, remote_params_map) + if main_program._distributed_lookup_table: + __save_distributed_lookup_tables( + executor, + dirname, + main_program._distributed_lookup_table, + main_program._endpoints, + ) + + +def is_persistable(var): + """ + Check whether the given variable is persistable. + + Args: + var(Variable): The variable to be checked. + + Returns: + bool: True if the given `var` is persistable + False if not. + + Examples: + .. code-block:: python + + import paddle + import paddle.fluid as fluid + + paddle.enable_static() + param = fluid.default_main_program().global_block().var('fc.b') + res = fluid.io.is_persistable(param) + """ + if ( + var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH + or var.desc.type() == core.VarDesc.VarType.FETCH_LIST + or var.desc.type() == core.VarDesc.VarType.READER + ): + return False + return var.persistable + + +@dygraph_not_support +def save_persistables(executor, dirname, main_program=None, filename=None): + """ + Save all persistable variables from :code:`main_program` to + the folder :code:`dirname` or file :code:`filename`. You can refer to + :ref:`api_guide_model_save_reader_en` for more details. And then + saves these persistables variables to the folder :code:`dirname` or file + :code:`filename`. + + The :code:`dirname` is used to specify the folder where persistable variables + are going to be saved. If you would like to save variables in separate + files, set :code:`filename` None; if you would like to save all variables in a + single file, use :code:`filename` to specify the file name. + + Args: + executor(Executor): The executor to run for saving persistable variables. + You can refer to :ref:`api_guide_executor_en` for + more details. + + dirname(str, optional): The saving directory path. + When you need to save the parameter to the memory, set it to None. + main_program(Program, optional): The program whose persistbale variables will + be saved. You can refer to + :ref:`api_guide_Program_en` for more details. + If it is None, the default main program will + be used. + Default: None. + filename(str, optional): The file to save all variables. If you prefer to + save variables in different files, set it to None. + Default: None. + + Returns: + str: When saving parameters to a file, returns None. + When saving parameters to memory, returns a binary string containing parameters. + + Examples: + .. code-block:: python + + import paddle + + paddle.enable_static() + dir_path = "./my_paddle_model" + file_name = "persistables" + image = paddle.static..data(name='img', shape=[None, 28, 28], dtype='float32') + label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') + feeder = paddle.static.DataFeeder(feed_list=[image, label], place=paddle.CPUPlace()) + + predict = paddle.static.nn.fc(x=image, size=10, activation='softmax') + loss = paddle.nn.functional.cross_entropy(input=predict, label=label) + avg_loss = paddle.mean(loss) + exe = paddle.static.Executor(paddle.CPUPlace()) + exe.run(paddle.static.default_startup_program()) + paddle.distributed.io.save_persistables(executor=exe, dirname=dir_path, filename=file_name) + # The persistables variables weights and bias in the fc layer of the network + # are going to be saved in the same file named "persistables" in the path + # "./my_paddle_model" + """ + if main_program and main_program._is_distributed: + return _save_distributed_persistables( + executor, dirname=dirname, main_program=main_program + ) + else: + return paddle.static.save_vars( + executor, + dirname=dirname, + main_program=main_program, + vars=None, + predicate=is_persistable, + filename=filename, + ) diff --git a/python/paddle/fluid/incubate/fleet/collective/__init__.py b/python/paddle/fluid/incubate/fleet/collective/__init__.py index 949ef93a47..c18e77b29c 100644 --- a/python/paddle/fluid/incubate/fleet/collective/__init__.py +++ b/python/paddle/fluid/incubate/fleet/collective/__init__.py @@ -154,7 +154,9 @@ class Collective(Fleet): "must be as Program type." ) - io.save_persistables(executor, dirname, main_program, filename=filename) + paddle.distributed.io.save_persistables( + executor, dirname, main_program, filename=filename + ) def save_checkpoint( self, diff --git a/python/paddle/fluid/tests/unittests/dist_save_load.py b/python/paddle/fluid/tests/unittests/dist_save_load.py index eb36010ea6..7f2d864a0f 100644 --- a/python/paddle/fluid/tests/unittests/dist_save_load.py +++ b/python/paddle/fluid/tests/unittests/dist_save_load.py @@ -178,7 +178,9 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2): fetch_list=[avg_cost.name], feed=feeder.feed(get_data()) ) if need_save and model_dir: - io.save_persistables(startup_exe, model_dir, trainer_prog) + paddle.distributed.io.save_persistables( + startup_exe, model_dir, trainer_prog + ) var = np.array( fluid.global_scope().find_var('__fc_b__').get_tensor() @@ -199,7 +201,7 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2): and idx == skip_steps and args.trainer_id == 0 ): - io.save_persistables( + paddle.distributed.io.save_persistables( startup_exe, model_dir, trainer_prog ) else: diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 5b874f1a17..c0f992c010 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -381,7 +381,7 @@ class TestDistRunnerBase: infer_save_dir_fleet = os.path.join( model_save_dir, "fleet_infer_2" ) - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, model_save_dir_fluid, fleet._origin_program ) fleet.save_persistables(executor=exe, dirname=model_save_dir_fleet) diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py index bff5754df1..f79afcca3d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py @@ -68,7 +68,9 @@ class SparseLoadOp(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) model_path = tempfile.mkdtemp() - fluid.io.save_persistables(executor=exe, dirname=model_path) + paddle.distributed.io.save_persistables( + executor=exe, dirname=model_path + ) return model_path diff --git a/python/paddle/fluid/tests/unittests/test_load_op.py b/python/paddle/fluid/tests/unittests/test_load_op.py index ed123e06a0..a299e6aaff 100644 --- a/python/paddle/fluid/tests/unittests/test_load_op.py +++ b/python/paddle/fluid/tests/unittests/test_load_op.py @@ -18,6 +18,7 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers import os import tempfile +import paddle class TestLoadOp(unittest.TestCase): @@ -42,7 +43,7 @@ class TestLoadOp(unittest.TestCase): ) exe = fluid.Executor(fluid.CPUPlace()) exe.run(start_prog) - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, dirname=os.path.join(self.temp_dir.name, "./model"), main_program=main_prog, diff --git a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py index 05ad3dc776..21e4636ce5 100644 --- a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py @@ -47,7 +47,7 @@ class TestLoadOpXpu(unittest.TestCase): ) exe = fluid.Executor(fluid.XPUPlace(0)) exe.run(start_prog) - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, dirname=self.model_path, main_program=main_prog ) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index cfdd62c92d..e177e351a6 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -1061,7 +1061,7 @@ class TestLoadFromOldInterface(unittest.TestCase): base_map[var.name] = t # fluid.save(main_program, "./test_1") - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, os.path.join(self.temp_dir.name, "test_path"), main_program ) @@ -1200,7 +1200,7 @@ class TestLoadFromOldInterface(unittest.TestCase): base_map[var.name] = t # fluid.save(main_program, "./test_1") - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, os.path.join(self.temp_dir.name, "test_static_load_var_list"), main_program, @@ -1338,7 +1338,7 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): base_map[var.name] = t save_dir = os.path.join(temp_dir.name, "test_path") # fluid.save(main_program, "./test_1") - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, save_dir, main_program, filename="model_single" ) @@ -1538,7 +1538,7 @@ class TestProgramStateOldSave(unittest.TestCase): self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t save_dir = os.path.join(self.temp_dir.name, "test_program_1") - fluid.io.save_persistables(exe, save_dir, main_program) + paddle.distributed.io.save_persistables(exe, save_dir, main_program) # set var to zero for var in main_program.list_vars(): @@ -1711,7 +1711,7 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): base_map[var.name] = t save_dir = os.path.join(temp_dir.name, "test_program_2") - fluid.io.save_persistables( + paddle.distributed.io.save_persistables( exe, save_dir, main_program, filename="model_1" ) diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 07e9d34e9b..6725ed1443 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -55,6 +55,7 @@ from ..fluid.framework import ( _get_paddle_place, ) # noqa: F401 from ..fluid.framework import dygraph_only # noqa: F401 +from ..fluid.framework import dygraph_not_support # noqa: F401 from ..fluid.framework import ( convert_np_dtype_to_dtype_, _varbase_creator, -- GitLab