From c70fe47c14c316422547d6bd37b7bc3d08decf6d Mon Sep 17 00:00:00 2001 From: Yulong Ao Date: Tue, 10 Jan 2023 15:54:33 +0800 Subject: [PATCH] [Auto Parallel] Remove some deprecated fluid APIs (#49099) * [Auto Parallel] Remove some fluid APIs * [Auto Parallel] Fix the wrong import * [Auto Parallel] Remove unnecessary comments * [Auto Parallel] Fix the importing bug --- .../distributed/auto_parallel/cluster_v2.py | 4 +-- .../distributed/auto_parallel/completion.py | 2 +- .../auto_parallel/cost/tensor_cost.py | 2 +- .../distributed/auto_parallel/cost_model.py | 2 +- .../distributed/auto_parallel/dist_context.py | 5 ++-- .../distributed/auto_parallel/dist_op.py | 4 +-- .../distributed/auto_parallel/dist_saver.py | 5 ++-- .../distributed/auto_parallel/dist_tensor.py | 3 ++- .../distributed/auto_parallel/engine.py | 21 +++++++-------- .../distributed/auto_parallel/helper.py | 3 +-- .../distributed/auto_parallel/interface.py | 2 +- .../dist_check_finite_and_unscale.py | 2 +- .../auto_parallel/operators/dist_embedding.py | 3 ++- .../auto_parallel/operators/dist_matmul.py | 3 ++- .../auto_parallel/operators/dist_pnorm.py | 4 +-- .../distributed/auto_parallel/parallelizer.py | 5 ++-- .../auto_parallel/parallelizer_v2.py | 5 ++-- .../distributed/auto_parallel/partitioner.py | 18 ++++++------- .../auto_parallel/process_group.py | 3 +-- .../distributed/auto_parallel/process_mesh.py | 4 +-- .../auto_parallel/process_mesh_v2.py | 2 +- .../distributed/auto_parallel/reshard.py | 20 +++++++------- .../auto_parallel/tuner/optimization_tuner.py | 4 +-- .../auto_parallel/tuner/profiler.py | 3 ++- .../paddle/distributed/auto_parallel/utils.py | 16 +++++------ .../distributed/passes/auto_parallel_amp.py | 6 ++--- ...uto_parallel_data_parallel_optimization.py | 4 +-- .../distributed/passes/auto_parallel_fp16.py | 11 ++++---- .../passes/auto_parallel_gradient_merge.py | 2 +- .../passes/auto_parallel_quantization.py | 6 ++--- .../passes/auto_parallel_recompute.py | 7 +++-- .../passes/auto_parallel_sharding.py | 4 +-- .../auto_parallel/test_cluster_v2.py | 27 ++++++++++--------- python/paddle/framework/__init__.py | 1 + python/paddle/static/__init__.py | 4 +++ python/paddle/utils/unique_name.py | 1 + 36 files changed, 110 insertions(+), 108 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/cluster_v2.py b/python/paddle/distributed/auto_parallel/cluster_v2.py index 951114ff38e..06cc8d0e470 100644 --- a/python/paddle/distributed/auto_parallel/cluster_v2.py +++ b/python/paddle/distributed/auto_parallel/cluster_v2.py @@ -16,9 +16,7 @@ from enum import IntEnum, unique import numpy as np -from paddle.fluid import core -from paddle.fluid.core import Device # noqa: F401 -from paddle.fluid.core import Link # noqa: F401 +from paddle.framework import core @unique diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py index 403a62734e9..123ff0e0206 100644 --- a/python/paddle/distributed/auto_parallel/completion.py +++ b/python/paddle/distributed/auto_parallel/completion.py @@ -16,7 +16,7 @@ import copy import logging from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.fluid import core +from paddle.framework import core from .dist_attribute import OperatorDistAttr, TensorDistAttr from .dist_context import _node_id diff --git a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py index 03140ae3dc3..34b7b20a7e2 100644 --- a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py @@ -16,7 +16,7 @@ from functools import reduce import paddle from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor -from paddle.fluid.framework import Variable +from paddle.static import Variable from .base_cost import Cost diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py index 5eeb1e41cd4..2ad7ec3a032 100644 --- a/python/paddle/distributed/auto_parallel/cost_model.py +++ b/python/paddle/distributed/auto_parallel/cost_model.py @@ -20,7 +20,7 @@ import numpy as np import paddle from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.fluid import core +from paddle.framework import core SUCC = 0 # successor PRED = 1 # predecessor diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py index d0a1a5c228a..8ee92eb958a 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/dist_context.py @@ -16,8 +16,7 @@ import copy from collections import defaultdict from paddle.distributed.passes import PassContext -from paddle.fluid import core, framework -from paddle.fluid.framework import set_flags +from paddle.framework import IrGraph, core, set_flags from .dist_op import DistributedOperator from .dist_tensor import DistributedTensor @@ -437,7 +436,7 @@ class DistributedContext: if with_graph: set_flags({"FLAGS_convert_all_blocks": True}) - self._serial_graph = framework.IrGraph( + self._serial_graph = IrGraph( core.Graph(self._serial_main_program.desc) ) self._init_dist_attr_for_graph() diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py index 6c57c563391..ac39d62a30d 100644 --- a/python/paddle/distributed/auto_parallel/dist_op.py +++ b/python/paddle/distributed/auto_parallel/dist_op.py @@ -15,7 +15,7 @@ import copy import paddle -from paddle.fluid.framework import Variable +from paddle.static import Variable from .dist_attribute import OperatorDistAttr from .utils import ( @@ -303,7 +303,7 @@ class DistributedOperatorHelper: tensor_to_dims_mapping[arg.name] = self._in_dims_mappings[index] index += 1 - default_prog = paddle.fluid.default_main_program() + default_prog = paddle.static.default_main_program() cur_block = default_prog.current_block() op_size = len(cur_block.ops) output = self._serial_op(*args, **kwargs) diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py index 4bd69444f6a..6d263604628 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/dist_saver.py @@ -21,8 +21,7 @@ import re import numpy as np import paddle -from paddle import fluid -from paddle.fluid import core +from paddle.framework import core from ..utils.log_utils import get_logger from .process_group import _g_process_group_map @@ -167,7 +166,7 @@ class DistributedSaver: dist_main_prog = kwargs.get('program', None) if not dist_main_prog: - dist_main_prog = fluid.default_main_program() + dist_main_prog = paddle.static.default_main_program() global_block = dist_main_prog.global_block() ops = global_block.ops diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py index 0a5f5d604d1..dbe9cc348cf 100644 --- a/python/paddle/distributed/auto_parallel/dist_tensor.py +++ b/python/paddle/distributed/auto_parallel/dist_tensor.py @@ -16,7 +16,8 @@ import copy import inspect import paddle -from paddle.fluid.framework import Block, Parameter, Variable +from paddle.framework import Block +from paddle.static import Parameter, Variable from .dist_attribute import TensorDistAttr from .utils import __no_shape_var_type__, _linear_idx2coordinate diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index d8febaf9d51..ead912764f0 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -24,17 +24,16 @@ import numpy as np import paddle import paddle.distributed.auto_parallel.utils as auto_utils import paddle.utils as utils -from paddle import fluid, static +from paddle import static from paddle.distributed import fleet -from paddle.fluid import Variable, core from paddle.fluid.dygraph.parallel import ParallelEnv -from paddle.fluid.executor import _to_name_str, global_scope -from paddle.fluid.framework import IrGraph, Operator -from paddle.fluid.framework import _current_expected_place as _get_device -from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.executor import _to_name_str from paddle.fluid.layers.utils import flatten +from paddle.framework import IrGraph +from paddle.framework import _current_expected_place as _get_device +from paddle.framework import core, in_dygraph_mode from paddle.metric import Metric -from paddle.static import InputSpec +from paddle.static import InputSpec, Operator, Variable, global_scope from ..utils.log_utils import get_logger from .callbacks import config_callbacks @@ -151,11 +150,11 @@ class Engine: if optimizer and not isinstance( optimizer, - (paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer), + (paddle.optimizer.Optimizer, paddle.static.Optimizer), ): raise TypeError( "'optimizer' must be object of class `paddle.optimizer.Optimizer`" - " or `paddle.fluid.optimizer.Optimizer`." + " or `paddle.static.Optimizer`." ) self._optimizer = auto_utils.validate_opt(optimizer) self._orig_optimizer = copy.deepcopy(self._optimizer) @@ -769,8 +768,8 @@ class Engine: process_group.instantiate() self._place = _get_device() - if isinstance(self._place, fluid.CUDAPlace): - self._place = fluid.CUDAPlace(ParallelEnv().dev_id) + if isinstance(self._place, paddle.framework.CUDAPlace): + self._place = paddle.framework.CUDAPlace(ParallelEnv().dev_id) if self._strategy.seed: paddle.seed(self._strategy.seed + self._dp_ranks[0]) diff --git a/python/paddle/distributed/auto_parallel/helper.py b/python/paddle/distributed/auto_parallel/helper.py index 68741eb1211..164f51fbcc7 100644 --- a/python/paddle/distributed/auto_parallel/helper.py +++ b/python/paddle/distributed/auto_parallel/helper.py @@ -15,11 +15,10 @@ import logging from collections import defaultdict -from paddle.fluid.executor import global_scope -from paddle.fluid.framework import Parameter, program_guard from paddle.jit import not_to_static, to_static from paddle.jit.dy2static.program_translator import StaticFunction from paddle.nn import Layer +from paddle.static import Parameter, global_scope, program_guard from .converter import Converter from .utils import get_logger, to_list diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index 98316100a8d..1d19fda1b83 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -207,7 +207,7 @@ def recompute(op): self._op = op def __call__(self, *args, **kwargs): - default_prog = paddle.fluid.default_main_program() + default_prog = paddle.static.default_main_program() cur_block = default_prog.current_block() op_size = len(cur_block.ops) output = self._op(*args, **kwargs) diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py index feb35717c1d..5fa88e55e94 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py @@ -16,7 +16,7 @@ from paddle.distributed.auto_parallel.process_group import ( get_world_process_group, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import core +from paddle.framework import core from ..dist_attribute import OperatorDistAttr from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py index f2714c5b67a..d92dd8f94d3 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py @@ -17,8 +17,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import ( IdentityOpCost, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import core, unique_name from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype +from paddle.framework import core +from paddle.utils import unique_name from ..cost import ( EmbeddingGradOpCost, diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 8ed07d1a0b2..a89da6fd4e4 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -19,8 +19,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import ( IdentityOpCost, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import core, unique_name from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype +from paddle.framework import core +from paddle.utils import unique_name from ..cost import ( MatmulGradOpCost, diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py index 7960fe849ba..228a8961a68 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py @@ -14,9 +14,9 @@ import copy -from paddle.fluid import core from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype -from paddle.fluid.framework import Operator +from paddle.framework import core +from paddle.static import Operator from ..dist_attribute import OperatorDistAttr, TensorDistAttr from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py index 0815ed1cd53..e665d5c43d9 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer.py +++ b/python/paddle/distributed/auto_parallel/parallelizer.py @@ -24,11 +24,10 @@ import sys import time import paddle -import paddle.fluid.core as core from paddle.distributed.passes import PassContext, new_pass from paddle.distributed.utils.log_utils import get_logger -from paddle.fluid import program_guard -from paddle.fluid.backward import append_backward +from paddle.framework import core +from paddle.static import append_backward, program_guard from .cluster import Cluster from .completion import Completer diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py index bccda52cfa4..48831f3ff2c 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py @@ -17,9 +17,8 @@ import logging import time from paddle.distributed.passes import new_pass -from paddle.fluid import program_guard -from paddle.fluid.backward import append_backward -from paddle.fluid.framework import unique_name +from paddle.static import append_backward, program_guard +from paddle.utils import unique_name from ..utils.log_utils import get_logger from .partitioner import Partitioner diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py index 172e68a5f52..03d744a6c04 100644 --- a/python/paddle/distributed/auto_parallel/partitioner.py +++ b/python/paddle/distributed/auto_parallel/partitioner.py @@ -14,13 +14,13 @@ import copy -import paddle.fluid as fluid +import paddle from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.operators.common import ( get_distributed_operator_impl_container, ) -from paddle.fluid import core -from paddle.fluid.framework import Parameter, Program +from paddle.framework import Program, core +from paddle.static import Parameter from .dist_attribute import OperatorDistAttr from .operators.common import BACKWARD_ONLY_DIST_OPS @@ -52,12 +52,12 @@ class Partitioner: def __init__(self, dist_context, rank_id=0): """ Args: - dist_context (paddle.fluid.DistributedContext): used to access the distributed_attr of var & op, every Partitioner object could maintain its own DistributedContext member, and partition program base on that shard scenario. + dist_context (DistributedContext): used to access the distributed_attr of var & op, every Partitioner object could maintain its own DistributedContext member, and partition program base on that shard scenario. rank_id (int): global rank id to which the partitioned distributed program belong. """ if not isinstance(dist_context, DistributedContext): raise TypeError( - "dist_context be paddle.fluid.DistributedContext, got %s here" + "dist_context be DistributedContext, got %s here" % type(dist_context) ) @@ -71,7 +71,7 @@ class Partitioner: ): if not isinstance(serial_main_program, (Program)): raise TypeError( - "main_program be paddle.fluid.framework.program, got %s here" + "main_program be paddle.framework.Program, got %s here" % type(serial_main_program) ) @@ -113,11 +113,11 @@ class Partitioner: if not isinstance(serial_startup_program, (Program)): raise TypeError( - "dist_context be paddle.fluid.framework.program, got %s here" + "dist_context be paddle.framework.Program, got %s here" % type(serial_startup_program) ) - partitioned_startup_prog = fluid.Program() + partitioned_startup_prog = paddle.framework.Program() ref_block = serial_main_program.global_block() target_block = partitioned_startup_prog.global_block() var2shape = {} @@ -183,7 +183,7 @@ class Partitioner: 2. replace local op with corresponding dist op """ - partitioned_main_prog = fluid.Program() + partitioned_main_prog = paddle.framework.Program() dist_op_context = self._dist_context.dist_op_context dist_op_context.dst_main_program = partitioned_main_prog diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py index 2a07bbe4bb9..8ad0172ea3d 100644 --- a/python/paddle/distributed/auto_parallel/process_group.py +++ b/python/paddle/distributed/auto_parallel/process_group.py @@ -15,9 +15,8 @@ from collections import OrderedDict import paddle -import paddle.fluid.core as core from paddle import _legacy_C_ops -from paddle.fluid.framework import in_dygraph_mode +from paddle.framework import core, in_dygraph_mode from ...fluid.layers.tensor import fill_constant from ..collective import _get_global_env, _new_ring_id diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py index dacc10f101b..a7786cbea0c 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh.py +++ b/python/paddle/distributed/auto_parallel/process_mesh.py @@ -157,7 +157,7 @@ class ProcessMesh(core.ProcessMesh): def __enter__(self): set_current_process_mesh(self) - default_prog = paddle.fluid.default_main_program() + default_prog = paddle.static.default_main_program() cur_block = default_prog.current_block() self._old_var_names = list(cur_block.vars.keys()) self._old_op_size = len(cur_block.ops) @@ -166,7 +166,7 @@ class ProcessMesh(core.ProcessMesh): from .dist_op import DistributedOperator from .dist_tensor import DistributedTensor - default_prog = paddle.fluid.default_main_program() + default_prog = paddle.static.default_main_program() cur_block = default_prog.current_block() new_var_names = list(cur_block.vars.keys()) new_op_size = len(cur_block.ops) diff --git a/python/paddle/distributed/auto_parallel/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/process_mesh_v2.py index 46f03b2cc60..a9e66e20c86 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh_v2.py +++ b/python/paddle/distributed/auto_parallel/process_mesh_v2.py @@ -14,7 +14,7 @@ import numpy as np -from paddle.fluid import core +from paddle.framework import core class ProcessMesh(core.ProcessMesh): diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py index bc09aaaf7a7..0733a22345e 100644 --- a/python/paddle/distributed/auto_parallel/reshard.py +++ b/python/paddle/distributed/auto_parallel/reshard.py @@ -15,11 +15,9 @@ from functools import reduce import paddle -import paddle.fluid.core as core import paddle.fluid.layers.utils as utils from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.fluid.framework import OpProtoHolder, Program -from paddle.fluid.layer_helper import LayerHelper +from paddle.framework import LayerHelper, OpProtoHolder, Program, core from paddle.utils import unique_name from .cost import ( @@ -310,7 +308,7 @@ class Inserter: @staticmethod def insert_cast_op(block, idx, tensor, op_role, tensor_type): # to avoid name conflict with framework - new_var_name = paddle.fluid.unique_name.generate_with_ignorable_key( + new_var_name = paddle.utils.unique_name.generate_with_ignorable_key( ".".join(["cast@RESHARD", 'tmp']) ) out = block.create_var( @@ -380,7 +378,7 @@ class Inserter: def insert_reset_lod_op(block, idx, X, Y, op_role): """Insert reset_lod op into block at the given index.""" - new_var_name = paddle.fluid.unique_name.generate_with_ignorable_key( + new_var_name = paddle.utils.unique_name.generate_with_ignorable_key( ".".join(["reset_lod@RESHARD", 'tmp']) ) reset_lod_out = block.create_var( @@ -412,7 +410,7 @@ class Inserter: helper = LayerHelper('concat@RESHARD', **locals()) with paddle.static.program_guard(block.program): out = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join([helper.name, 'tmp']) ), dtype=tensors[0].dtype, @@ -484,7 +482,7 @@ class Inserter: with paddle.static.program_guard(block.program): outs = [ block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join(['split@RESHARD', 'tmp']) ), dtype=tensor.dtype, @@ -550,7 +548,7 @@ class Inserter: with paddle.static.program_guard(block.program): outs = [ block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join([helper.name, 'tmp']) ), dtype=tensor.dtype, @@ -576,7 +574,7 @@ class Inserter: # use paddle.int64 as dtype with paddle.static.program_guard(block.program): out = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join([helper.name, 'tmp']) ), dtype=paddle.int64, @@ -650,7 +648,7 @@ class Inserter: helper = LayerHelper(op_type + "@RESHARD", **locals()) with paddle.static.program_guard(block.program): allgather_out = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join([helper.name, 'tmp']) ), dtype=tensor.dtype, @@ -695,7 +693,7 @@ class Inserter: helper = LayerHelper(op_type + "@RESHARD", **locals()) with paddle.static.program_guard(block.program): c_concat_out = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join([helper.name, 'tmp']) ), dtype=tensor.dtype, diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py index e09f55d91ec..508a9e51b54 100644 --- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py +++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py @@ -41,8 +41,8 @@ from paddle.distributed.auto_parallel.utils import ( set_grad_var_shape, ) from paddle.distributed.passes import PassContext, new_pass -from paddle.fluid import program_guard, unique_name -from paddle.fluid.backward import append_backward +from paddle.static import append_backward, program_guard +from paddle.utils import unique_name from ..utils import get_logger from .algorithms import new_algorithm diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py index cdd4a0045c8..4269a773645 100644 --- a/python/paddle/distributed/auto_parallel/tuner/profiler.py +++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py @@ -28,7 +28,8 @@ from paddle.distributed.auto_parallel.process_group import ( new_process_group, ) from paddle.distributed.collective import _get_global_env -from paddle.fluid.framework import Operator, Program, _current_expected_place +from paddle.framework import Program, _current_expected_place +from paddle.static import Operator paddle.enable_static() diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 9eb2f806eab..5a6c46af0eb 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -22,9 +22,9 @@ from functools import reduce import numpy as np import paddle -from paddle.fluid.framework import Variable from paddle.fluid.io import is_belong_to_optimizer, is_parameter from paddle.framework import core +from paddle.static import Variable from .dist_attribute import OperatorDistAttr, TensorDistAttr from .process_group import get_all_process_groups @@ -619,7 +619,7 @@ def save_distributed_checkpoint( """ from .dist_context import get_default_distributed_context - assert isinstance(program, paddle.fluid.framework.Program) + assert isinstance(program, paddle.static.Program) assert isinstance(is_integrated, bool) if dist_context is None: dist_context = get_default_distributed_context() @@ -702,7 +702,7 @@ def load_checkpoint_into_program( """ from .dist_context import get_default_distributed_context - assert isinstance(program, paddle.fluid.framework.Program) + assert isinstance(program, paddle.static.Program) assert _check_valid_path( checkpoint_path ), "'checkpoint_path' cannot be None." @@ -731,7 +731,7 @@ def load_parameter_into_program(param_dict, program): program(Program): the program to be updated """ assert isinstance(param_dict, dict) - assert program and isinstance(program, paddle.fluid.framework.Program) + assert program and isinstance(program, paddle.static.Program) if not param_dict: return program.set_state_dict(param_dict) @@ -818,7 +818,7 @@ def get_dist_attr(program, dist_context=None): """ from .dist_context import get_default_distributed_context - assert isinstance(program, paddle.fluid.framework.Program) + assert isinstance(program, paddle.static.Program) if dist_context is None: dist_context = get_default_distributed_context() dist_attr = {} @@ -1845,7 +1845,7 @@ def get_var_numel(var): def get_lr(optimizer): if isinstance(optimizer, paddle.optimizer.Optimizer): return optimizer.get_lr() - elif isinstance(optimizer, paddle.fluid.optimizer.Optimizer): + elif isinstance(optimizer, paddle.static.Optimizer): if isinstance(optimizer._learning_rate, float): return optimizer._learning_rate else: @@ -1853,9 +1853,7 @@ def get_lr(optimizer): else: raise TypeError( "'optimizer' must be object of class `paddle.optimizer.Optimizer`" - " or `paddle.fluid.optimizer.Optimizer`, but got {}.".format( - type(optimizer) - ) + " or `paddle.static.Optimizer`, but got {}.".format(type(optimizer)) ) diff --git a/python/paddle/distributed/passes/auto_parallel_amp.py b/python/paddle/distributed/passes/auto_parallel_amp.py index 06524a905bd..8f965bdd950 100644 --- a/python/paddle/distributed/passes/auto_parallel_amp.py +++ b/python/paddle/distributed/passes/auto_parallel_amp.py @@ -23,7 +23,6 @@ from paddle.distributed.auto_parallel.utils import ( set_var_dist_attr, ) from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.fluid import unique_name from paddle.fluid.contrib.mixed_precision.fp16_utils import ( AutoMixedPrecisionLists, _dtype_to_str, @@ -38,6 +37,7 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import ( ) from paddle.fluid.data_feeder import check_type, check_variable_and_dtype from paddle.framework import core +from paddle.utils import unique_name from ..auto_parallel.process_mesh import ProcessMesh from ..auto_parallel.utils import is_backward_op, is_forward_op, is_loss_op @@ -523,7 +523,7 @@ def _update_backward_cast_ops(params_grads, dist_context): # add new op in the python and cpp at the same time new_op_desc = main_block.desc.append_op() new_op_desc.copy_from(op.desc) - new_op = paddle.fluid.framework.Operator( + new_op = paddle.static.Operator( block=main_block, desc=new_op_desc, type=None, @@ -898,7 +898,7 @@ class AMPPass(PassBase): OP_ROLE_KEY, core.op_proto_and_checker_maker.OpRole.Backward ) elementwise_mul_grad_op_desc._set_attr('axis', -1) - elementwise_mul_grad_op = paddle.fluid.framework.Operator( + elementwise_mul_grad_op = paddle.static.Operator( main_block, elementwise_mul_grad_op_desc ) main_block.ops.insert(loss_op_idx + 3, elementwise_mul_grad_op) diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py index 8be8c67bca6..e9acb9074fe 100644 --- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py +++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py @@ -29,9 +29,9 @@ from paddle.distributed.auto_parallel.utils import ( ring_id_to_process_group, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import unique_name from paddle.fluid.executor import _is_enable_standalone_executor -from paddle.fluid.framework import default_main_program +from paddle.static import default_main_program +from paddle.utils import unique_name from .pass_base import PassBase, PassType, register_pass diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py index ffe9eb0b4b5..53aba19b98f 100644 --- a/python/paddle/distributed/passes/auto_parallel_fp16.py +++ b/python/paddle/distributed/passes/auto_parallel_fp16.py @@ -26,7 +26,6 @@ from paddle.distributed.auto_parallel.utils import ( set_var_dist_attr, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import unique_name from paddle.fluid.contrib.mixed_precision.fp16_utils import ( AutoMixedPrecisionLists, _dtype_to_str, @@ -35,8 +34,9 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import ( _valid_types, ) from paddle.fluid.data_feeder import check_type, check_variable_and_dtype -from paddle.fluid.framework import default_main_program, default_startup_program from paddle.framework import core +from paddle.static import default_main_program, default_startup_program +from paddle.utils import unique_name from ..auto_parallel.process_mesh import ProcessMesh from .auto_parallel_amp import AMPPass @@ -790,7 +790,7 @@ class FP16Pass(AMPPass): # all_infs = paddle.fluid.layers.concat(found_infs) all_infs = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join(['concat', 'tmp']) ), dtype=found_infs[0].dtype, @@ -821,7 +821,7 @@ class FP16Pass(AMPPass): # found_inf = paddle.fluid.layers.reduce_any(all_infs) found_inf = block.create_var( - name=paddle.fluid.unique_name.generate_with_ignorable_key( + name=paddle.utils.unique_name.generate_with_ignorable_key( ".".join(['reduce_any', 'tmp']) ), dtype=all_infs.dtype, @@ -867,7 +867,8 @@ class FP16Pass(AMPPass): if self.get_attr("use_optimizer_fp16"): base_opt._multi_precision = False if isinstance( - base_opt, (paddle.fluid.optimizer.Adam, paddle.optimizer.AdamW) + base_opt, + (paddle.static.Adam, paddle.optimizer.AdamW), ): with main_program._optimized_guard([]): # found_inf = paddle.tensor.creation._memcpy( diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index 748aab45e31..f807127f45e 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -26,8 +26,8 @@ from paddle.distributed.auto_parallel.utils import ( ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.fluid import layers -from paddle.fluid.framework import device_guard from paddle.framework import core +from paddle.static import device_guard from .pass_base import PassBase, PassType, register_pass diff --git a/python/paddle/distributed/passes/auto_parallel_quantization.py b/python/paddle/distributed/passes/auto_parallel_quantization.py index 9019f3b0cc9..ea4357fdcc5 100644 --- a/python/paddle/distributed/passes/auto_parallel_quantization.py +++ b/python/paddle/distributed/passes/auto_parallel_quantization.py @@ -17,8 +17,8 @@ import logging import numpy as np import paddle -from paddle.fluid import core, framework from paddle.fluid.dygraph.parallel import ParallelEnv +from paddle.framework import IrGraph, core from paddle.static.quantization import ( AddQuantDequantForInferencePass, AddQuantDequantPassV2, @@ -72,7 +72,7 @@ class QuantizationPass(PassBase): # TODO: scope and place will be removed, # cause params should be initialized by engine module. scope = paddle.static.global_scope() - place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) + place = paddle.framework.CUDAPlace(ParallelEnv().dev_id) # 0. record the relation among blocks parent_idx_dict = dict() @@ -81,7 +81,7 @@ class QuantizationPass(PassBase): is_test = True if mode != "train" else False # 1. Program convert to Graph, and this pass is only for train mode - main_graph = framework.IrGraph( + main_graph = IrGraph( core.Graph(main_program.desc), for_test=mode != "train" ) diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py index 40754a8f493..a1fa73d8c33 100644 --- a/python/paddle/distributed/passes/auto_parallel_recompute.py +++ b/python/paddle/distributed/passes/auto_parallel_recompute.py @@ -14,8 +14,8 @@ import logging +import paddle from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid import core, framework, unique_name from paddle.fluid.backward import ( ProgramStats, _append_grad_suffix_, @@ -23,6 +23,8 @@ from paddle.fluid.backward import ( _get_no_grad_set_name, _rename_arg_, ) +from paddle.framework import core +from paddle.utils import unique_name from ..auto_parallel.dist_attribute import OperatorDistAttr from ..auto_parallel.utils import ( @@ -221,7 +223,8 @@ def _add_needed_descs_to_block( result_descs = [] for desc in descs: - if isinstance(desc, framework.Operator): + # if isinstance(desc, framework.Operator): + if isinstance(desc, paddle.static.Operator): desc = desc.desc if isinstance(desc, tuple): desc = desc[0] diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index bb3ebbeaf8f..29e6e59c584 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -35,10 +35,10 @@ from paddle.distributed.auto_parallel.utils import ( set_var_dist_attr, ) from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size -from paddle.fluid import unique_name from paddle.fluid.executor import _is_enable_standalone_executor -from paddle.fluid.framework import default_main_program, default_startup_program from paddle.framework import core +from paddle.static import default_main_program, default_startup_program +from paddle.utils import unique_name from .pass_base import PassBase, register_pass diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster_v2.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster_v2.py index db1febadeaa..3f10fb95b84 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster_v2.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster_v2.py @@ -14,7 +14,8 @@ import unittest -from paddle.distributed.auto_parallel.cluster_v2 import Device, DeviceMesh, Link +from paddle.distributed.auto_parallel.cluster_v2 import DeviceMesh +from paddle.framework import core class TestDeviceMesh(unittest.TestCase): @@ -38,12 +39,12 @@ class TestDeviceMesh(unittest.TestCase): self.assertEqual(device_mesh.contains(0), True) self.assertEqual(device_mesh.contains(6), False) - dev0 = Device(global_id=0, local_id=0, machine_id=0, type="GPU") - dev1 = Device(global_id=1, local_id=1, machine_id=0, type="GPU") - dev2 = Device(global_id=2, local_id=2, machine_id=0, type="GPU") - dev3 = Device(global_id=3, local_id=0, machine_id=1, type="GPU") - dev4 = Device(global_id=4, local_id=1, machine_id=1, type="GPU") - dev5 = Device(global_id=5, local_id=2, machine_id=1, type="GPU") + dev0 = core.Device(global_id=0, local_id=0, machine_id=0, type="GPU") + dev1 = core.Device(global_id=1, local_id=1, machine_id=0, type="GPU") + dev2 = core.Device(global_id=2, local_id=2, machine_id=0, type="GPU") + dev3 = core.Device(global_id=3, local_id=0, machine_id=1, type="GPU") + dev4 = core.Device(global_id=4, local_id=1, machine_id=1, type="GPU") + dev5 = core.Device(global_id=5, local_id=2, machine_id=1, type="GPU") device_mesh.add_device(dev0) device_mesh.add_device(dev1) device_mesh.add_device(dev2) @@ -57,10 +58,10 @@ class TestDeviceMesh(unittest.TestCase): self.assertEqual(device_mesh.device(4), dev4) self.assertEqual(device_mesh.device(5), dev5) - link0 = Link(source_id=0, target_id=1, type="NVL") - link1 = Link(source_id=1, target_id=0, type="NVL") - link2 = Link(source_id=3, target_id=4, type="NVL") - link3 = Link(source_id=4, target_id=5, type="NVL") + link0 = core.Link(source_id=0, target_id=1, type="NVL") + link1 = core.Link(source_id=1, target_id=0, type="NVL") + link2 = core.Link(source_id=3, target_id=4, type="NVL") + link3 = core.Link(source_id=4, target_id=5, type="NVL") device_mesh.add_link(link0) device_mesh.add_link(link1) device_mesh.add_link(link2) @@ -90,7 +91,7 @@ class TestDeviceMesh(unittest.TestCase): self.assertEqual(str(device_mesh), str(device_mesh)) def test_device(self): - device = Device(global_id=0, local_id=1, machine_id=2, type="GPU") + device = core.Device(global_id=0, local_id=1, machine_id=2, type="GPU") device.capability.sflops = 100 device.capability.dflops = 200 device.capability.memory = 32 @@ -107,7 +108,7 @@ class TestDeviceMesh(unittest.TestCase): self.assertEqual(str(device), str(device)) def test_link(self): - link = Link(source_id=0, target_id=1, type="NVL") + link = core.Link(source_id=0, target_id=1, type="NVL") link.capability.bandwidth = 100 link.capability.latency = 1 self.assertEqual(link.source_id, 0) diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 986b8e93ae6..f0d27edae60 100755 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -69,6 +69,7 @@ from ..fluid.framework import _apply_pass # noqa: F401 from ..fluid.framework import switch_main_program from ..fluid.framework import _set_expected_place # noqa: F401 from ..fluid.framework import Block, Program # noqa: F401 +from ..fluid.framework import IrGraph # noqa: F401 from ..fluid.dygraph import parallel_helper # noqa: F401 from ..fluid.dygraph.parallel import ( _split_tensors, diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index c9f1ec49704..ac1539d847a 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -56,11 +56,15 @@ from ..fluid.framework import xpu_places # noqa: F401 from ..fluid.framework import mlu_places # noqa: F401 from ..fluid.framework import npu_places # noqa: F401 from ..fluid.framework import Variable # noqa: F401 +from ..fluid.framework import Operator # noqa: F401 +from ..fluid.framework import Parameter # noqa: F401 from ..fluid.framework import ipu_shard_guard # noqa: F401 from ..fluid.framework import set_ipu_shard # noqa: F401 from ..fluid.layers.control_flow import Print # noqa: F401 from ..fluid.parallel_executor import ParallelExecutor # noqa: F401 from ..fluid.param_attr import WeightNormParamAttr # noqa: F401 +from ..fluid.optimizer import Optimizer # noqa: F401 +from ..fluid.optimizer import Adam # noqa: F401 from ..fluid.optimizer import ExponentialMovingAverage # noqa: F401 from ..fluid.io import save # noqa: F401 from ..fluid.io import load # noqa: F401 diff --git a/python/paddle/utils/unique_name.py b/python/paddle/utils/unique_name.py index e4ac2381479..0aae339f295 100644 --- a/python/paddle/utils/unique_name.py +++ b/python/paddle/utils/unique_name.py @@ -13,6 +13,7 @@ # limitations under the License. from ..fluid.unique_name import generate # noqa: F401 +from ..fluid.unique_name import generate_with_ignorable_key # noqa: F401 from ..fluid.unique_name import guard # noqa: F401 from ..fluid.unique_name import switch # noqa: F401 -- GitLab