未验证 提交 c70fe47c 编写于 作者: Y Yulong Ao 提交者: GitHub

[Auto Parallel] Remove some deprecated fluid APIs (#49099)

* [Auto Parallel] Remove some fluid APIs

* [Auto Parallel] Fix the wrong import

* [Auto Parallel] Remove unnecessary comments

* [Auto Parallel] Fix the importing bug
上级 daea892c
...@@ -16,9 +16,7 @@ from enum import IntEnum, unique ...@@ -16,9 +16,7 @@ from enum import IntEnum, unique
import numpy as np import numpy as np
from paddle.fluid import core from paddle.framework import core
from paddle.fluid.core import Device # noqa: F401
from paddle.fluid.core import Link # noqa: F401
@unique @unique
......
...@@ -16,7 +16,7 @@ import copy ...@@ -16,7 +16,7 @@ import copy
import logging import logging
from paddle.distributed.fleet.meta_optimizers.common import OpRole from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.fluid import core from paddle.framework import core
from .dist_attribute import OperatorDistAttr, TensorDistAttr from .dist_attribute import OperatorDistAttr, TensorDistAttr
from .dist_context import _node_id from .dist_context import _node_id
......
...@@ -16,7 +16,7 @@ from functools import reduce ...@@ -16,7 +16,7 @@ from functools import reduce
import paddle import paddle
from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor
from paddle.fluid.framework import Variable from paddle.static import Variable
from .base_cost import Cost from .base_cost import Cost
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import paddle import paddle
from paddle.distributed.fleet.meta_optimizers.common import OpRole from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.fluid import core from paddle.framework import core
SUCC = 0 # successor SUCC = 0 # successor
PRED = 1 # predecessor PRED = 1 # predecessor
......
...@@ -16,8 +16,7 @@ import copy ...@@ -16,8 +16,7 @@ import copy
from collections import defaultdict from collections import defaultdict
from paddle.distributed.passes import PassContext from paddle.distributed.passes import PassContext
from paddle.fluid import core, framework from paddle.framework import IrGraph, core, set_flags
from paddle.fluid.framework import set_flags
from .dist_op import DistributedOperator from .dist_op import DistributedOperator
from .dist_tensor import DistributedTensor from .dist_tensor import DistributedTensor
...@@ -437,7 +436,7 @@ class DistributedContext: ...@@ -437,7 +436,7 @@ class DistributedContext:
if with_graph: if with_graph:
set_flags({"FLAGS_convert_all_blocks": True}) set_flags({"FLAGS_convert_all_blocks": True})
self._serial_graph = framework.IrGraph( self._serial_graph = IrGraph(
core.Graph(self._serial_main_program.desc) core.Graph(self._serial_main_program.desc)
) )
self._init_dist_attr_for_graph() self._init_dist_attr_for_graph()
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import copy import copy
import paddle import paddle
from paddle.fluid.framework import Variable from paddle.static import Variable
from .dist_attribute import OperatorDistAttr from .dist_attribute import OperatorDistAttr
from .utils import ( from .utils import (
...@@ -303,7 +303,7 @@ class DistributedOperatorHelper: ...@@ -303,7 +303,7 @@ class DistributedOperatorHelper:
tensor_to_dims_mapping[arg.name] = self._in_dims_mappings[index] tensor_to_dims_mapping[arg.name] = self._in_dims_mappings[index]
index += 1 index += 1
default_prog = paddle.fluid.default_main_program() default_prog = paddle.static.default_main_program()
cur_block = default_prog.current_block() cur_block = default_prog.current_block()
op_size = len(cur_block.ops) op_size = len(cur_block.ops)
output = self._serial_op(*args, **kwargs) output = self._serial_op(*args, **kwargs)
......
...@@ -21,8 +21,7 @@ import re ...@@ -21,8 +21,7 @@ import re
import numpy as np import numpy as np
import paddle import paddle
from paddle import fluid from paddle.framework import core
from paddle.fluid import core
from ..utils.log_utils import get_logger from ..utils.log_utils import get_logger
from .process_group import _g_process_group_map from .process_group import _g_process_group_map
...@@ -167,7 +166,7 @@ class DistributedSaver: ...@@ -167,7 +166,7 @@ class DistributedSaver:
dist_main_prog = kwargs.get('program', None) dist_main_prog = kwargs.get('program', None)
if not dist_main_prog: if not dist_main_prog:
dist_main_prog = fluid.default_main_program() dist_main_prog = paddle.static.default_main_program()
global_block = dist_main_prog.global_block() global_block = dist_main_prog.global_block()
ops = global_block.ops ops = global_block.ops
......
...@@ -16,7 +16,8 @@ import copy ...@@ -16,7 +16,8 @@ import copy
import inspect import inspect
import paddle import paddle
from paddle.fluid.framework import Block, Parameter, Variable from paddle.framework import Block
from paddle.static import Parameter, Variable
from .dist_attribute import TensorDistAttr from .dist_attribute import TensorDistAttr
from .utils import __no_shape_var_type__, _linear_idx2coordinate from .utils import __no_shape_var_type__, _linear_idx2coordinate
......
...@@ -24,17 +24,16 @@ import numpy as np ...@@ -24,17 +24,16 @@ import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel.utils as auto_utils import paddle.distributed.auto_parallel.utils as auto_utils
import paddle.utils as utils import paddle.utils as utils
from paddle import fluid, static from paddle import static
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid import Variable, core
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.executor import _to_name_str, global_scope from paddle.fluid.executor import _to_name_str
from paddle.fluid.framework import IrGraph, Operator
from paddle.fluid.framework import _current_expected_place as _get_device
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.framework import IrGraph
from paddle.framework import _current_expected_place as _get_device
from paddle.framework import core, in_dygraph_mode
from paddle.metric import Metric from paddle.metric import Metric
from paddle.static import InputSpec from paddle.static import InputSpec, Operator, Variable, global_scope
from ..utils.log_utils import get_logger from ..utils.log_utils import get_logger
from .callbacks import config_callbacks from .callbacks import config_callbacks
...@@ -151,11 +150,11 @@ class Engine: ...@@ -151,11 +150,11 @@ class Engine:
if optimizer and not isinstance( if optimizer and not isinstance(
optimizer, optimizer,
(paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer), (paddle.optimizer.Optimizer, paddle.static.Optimizer),
): ):
raise TypeError( raise TypeError(
"'optimizer' must be object of class `paddle.optimizer.Optimizer`" "'optimizer' must be object of class `paddle.optimizer.Optimizer`"
" or `paddle.fluid.optimizer.Optimizer`." " or `paddle.static.Optimizer`."
) )
self._optimizer = auto_utils.validate_opt(optimizer) self._optimizer = auto_utils.validate_opt(optimizer)
self._orig_optimizer = copy.deepcopy(self._optimizer) self._orig_optimizer = copy.deepcopy(self._optimizer)
...@@ -769,8 +768,8 @@ class Engine: ...@@ -769,8 +768,8 @@ class Engine:
process_group.instantiate() process_group.instantiate()
self._place = _get_device() self._place = _get_device()
if isinstance(self._place, fluid.CUDAPlace): if isinstance(self._place, paddle.framework.CUDAPlace):
self._place = fluid.CUDAPlace(ParallelEnv().dev_id) self._place = paddle.framework.CUDAPlace(ParallelEnv().dev_id)
if self._strategy.seed: if self._strategy.seed:
paddle.seed(self._strategy.seed + self._dp_ranks[0]) paddle.seed(self._strategy.seed + self._dp_ranks[0])
......
...@@ -15,11 +15,10 @@ ...@@ -15,11 +15,10 @@
import logging import logging
from collections import defaultdict from collections import defaultdict
from paddle.fluid.executor import global_scope
from paddle.fluid.framework import Parameter, program_guard
from paddle.jit import not_to_static, to_static from paddle.jit import not_to_static, to_static
from paddle.jit.dy2static.program_translator import StaticFunction from paddle.jit.dy2static.program_translator import StaticFunction
from paddle.nn import Layer from paddle.nn import Layer
from paddle.static import Parameter, global_scope, program_guard
from .converter import Converter from .converter import Converter
from .utils import get_logger, to_list from .utils import get_logger, to_list
......
...@@ -207,7 +207,7 @@ def recompute(op): ...@@ -207,7 +207,7 @@ def recompute(op):
self._op = op self._op = op
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
default_prog = paddle.fluid.default_main_program() default_prog = paddle.static.default_main_program()
cur_block = default_prog.current_block() cur_block = default_prog.current_block()
op_size = len(cur_block.ops) op_size = len(cur_block.ops)
output = self._op(*args, **kwargs) output = self._op(*args, **kwargs)
......
...@@ -16,7 +16,7 @@ from paddle.distributed.auto_parallel.process_group import ( ...@@ -16,7 +16,7 @@ from paddle.distributed.auto_parallel.process_group import (
get_world_process_group, get_world_process_group,
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import core from paddle.framework import core
from ..dist_attribute import OperatorDistAttr from ..dist_attribute import OperatorDistAttr
from ..process_group import new_process_group from ..process_group import new_process_group
......
...@@ -17,8 +17,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import ( ...@@ -17,8 +17,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import (
IdentityOpCost, IdentityOpCost,
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import core, unique_name
from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
from paddle.framework import core
from paddle.utils import unique_name
from ..cost import ( from ..cost import (
EmbeddingGradOpCost, EmbeddingGradOpCost,
......
...@@ -19,8 +19,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import ( ...@@ -19,8 +19,9 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import (
IdentityOpCost, IdentityOpCost,
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import core, unique_name
from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
from paddle.framework import core
from paddle.utils import unique_name
from ..cost import ( from ..cost import (
MatmulGradOpCost, MatmulGradOpCost,
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import copy import copy
from paddle.fluid import core
from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
from paddle.fluid.framework import Operator from paddle.framework import core
from paddle.static import Operator
from ..dist_attribute import OperatorDistAttr, TensorDistAttr from ..dist_attribute import OperatorDistAttr, TensorDistAttr
from ..process_group import new_process_group from ..process_group import new_process_group
......
...@@ -24,11 +24,10 @@ import sys ...@@ -24,11 +24,10 @@ import sys
import time import time
import paddle import paddle
import paddle.fluid.core as core
from paddle.distributed.passes import PassContext, new_pass from paddle.distributed.passes import PassContext, new_pass
from paddle.distributed.utils.log_utils import get_logger from paddle.distributed.utils.log_utils import get_logger
from paddle.fluid import program_guard from paddle.framework import core
from paddle.fluid.backward import append_backward from paddle.static import append_backward, program_guard
from .cluster import Cluster from .cluster import Cluster
from .completion import Completer from .completion import Completer
......
...@@ -17,9 +17,8 @@ import logging ...@@ -17,9 +17,8 @@ import logging
import time import time
from paddle.distributed.passes import new_pass from paddle.distributed.passes import new_pass
from paddle.fluid import program_guard from paddle.static import append_backward, program_guard
from paddle.fluid.backward import append_backward from paddle.utils import unique_name
from paddle.fluid.framework import unique_name
from ..utils.log_utils import get_logger from ..utils.log_utils import get_logger
from .partitioner import Partitioner from .partitioner import Partitioner
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
import copy import copy
import paddle.fluid as fluid import paddle
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed.auto_parallel.operators.common import ( from paddle.distributed.auto_parallel.operators.common import (
get_distributed_operator_impl_container, get_distributed_operator_impl_container,
) )
from paddle.fluid import core from paddle.framework import Program, core
from paddle.fluid.framework import Parameter, Program from paddle.static import Parameter
from .dist_attribute import OperatorDistAttr from .dist_attribute import OperatorDistAttr
from .operators.common import BACKWARD_ONLY_DIST_OPS from .operators.common import BACKWARD_ONLY_DIST_OPS
...@@ -52,12 +52,12 @@ class Partitioner: ...@@ -52,12 +52,12 @@ class Partitioner:
def __init__(self, dist_context, rank_id=0): def __init__(self, dist_context, rank_id=0):
""" """
Args: Args:
dist_context (paddle.fluid.DistributedContext): used to access the distributed_attr of var & op, every Partitioner object could maintain its own DistributedContext member, and partition program base on that shard scenario. dist_context (DistributedContext): used to access the distributed_attr of var & op, every Partitioner object could maintain its own DistributedContext member, and partition program base on that shard scenario.
rank_id (int): global rank id to which the partitioned distributed program belong. rank_id (int): global rank id to which the partitioned distributed program belong.
""" """
if not isinstance(dist_context, DistributedContext): if not isinstance(dist_context, DistributedContext):
raise TypeError( raise TypeError(
"dist_context be paddle.fluid.DistributedContext, got %s here" "dist_context be DistributedContext, got %s here"
% type(dist_context) % type(dist_context)
) )
...@@ -71,7 +71,7 @@ class Partitioner: ...@@ -71,7 +71,7 @@ class Partitioner:
): ):
if not isinstance(serial_main_program, (Program)): if not isinstance(serial_main_program, (Program)):
raise TypeError( raise TypeError(
"main_program be paddle.fluid.framework.program, got %s here" "main_program be paddle.framework.Program, got %s here"
% type(serial_main_program) % type(serial_main_program)
) )
...@@ -113,11 +113,11 @@ class Partitioner: ...@@ -113,11 +113,11 @@ class Partitioner:
if not isinstance(serial_startup_program, (Program)): if not isinstance(serial_startup_program, (Program)):
raise TypeError( raise TypeError(
"dist_context be paddle.fluid.framework.program, got %s here" "dist_context be paddle.framework.Program, got %s here"
% type(serial_startup_program) % type(serial_startup_program)
) )
partitioned_startup_prog = fluid.Program() partitioned_startup_prog = paddle.framework.Program()
ref_block = serial_main_program.global_block() ref_block = serial_main_program.global_block()
target_block = partitioned_startup_prog.global_block() target_block = partitioned_startup_prog.global_block()
var2shape = {} var2shape = {}
...@@ -183,7 +183,7 @@ class Partitioner: ...@@ -183,7 +183,7 @@ class Partitioner:
2. replace local op with corresponding dist op 2. replace local op with corresponding dist op
""" """
partitioned_main_prog = fluid.Program() partitioned_main_prog = paddle.framework.Program()
dist_op_context = self._dist_context.dist_op_context dist_op_context = self._dist_context.dist_op_context
dist_op_context.dst_main_program = partitioned_main_prog dist_op_context.dst_main_program = partitioned_main_prog
......
...@@ -15,9 +15,8 @@ ...@@ -15,9 +15,8 @@
from collections import OrderedDict from collections import OrderedDict
import paddle import paddle
import paddle.fluid.core as core
from paddle import _legacy_C_ops from paddle import _legacy_C_ops
from paddle.fluid.framework import in_dygraph_mode from paddle.framework import core, in_dygraph_mode
from ...fluid.layers.tensor import fill_constant from ...fluid.layers.tensor import fill_constant
from ..collective import _get_global_env, _new_ring_id from ..collective import _get_global_env, _new_ring_id
......
...@@ -157,7 +157,7 @@ class ProcessMesh(core.ProcessMesh): ...@@ -157,7 +157,7 @@ class ProcessMesh(core.ProcessMesh):
def __enter__(self): def __enter__(self):
set_current_process_mesh(self) set_current_process_mesh(self)
default_prog = paddle.fluid.default_main_program() default_prog = paddle.static.default_main_program()
cur_block = default_prog.current_block() cur_block = default_prog.current_block()
self._old_var_names = list(cur_block.vars.keys()) self._old_var_names = list(cur_block.vars.keys())
self._old_op_size = len(cur_block.ops) self._old_op_size = len(cur_block.ops)
...@@ -166,7 +166,7 @@ class ProcessMesh(core.ProcessMesh): ...@@ -166,7 +166,7 @@ class ProcessMesh(core.ProcessMesh):
from .dist_op import DistributedOperator from .dist_op import DistributedOperator
from .dist_tensor import DistributedTensor from .dist_tensor import DistributedTensor
default_prog = paddle.fluid.default_main_program() default_prog = paddle.static.default_main_program()
cur_block = default_prog.current_block() cur_block = default_prog.current_block()
new_var_names = list(cur_block.vars.keys()) new_var_names = list(cur_block.vars.keys())
new_op_size = len(cur_block.ops) new_op_size = len(cur_block.ops)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import numpy as np import numpy as np
from paddle.fluid import core from paddle.framework import core
class ProcessMesh(core.ProcessMesh): class ProcessMesh(core.ProcessMesh):
......
...@@ -15,11 +15,9 @@ ...@@ -15,11 +15,9 @@
from functools import reduce from functools import reduce
import paddle import paddle
import paddle.fluid.core as core
import paddle.fluid.layers.utils as utils import paddle.fluid.layers.utils as utils
from paddle.distributed.fleet.meta_optimizers.common import OpRole from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.fluid.framework import OpProtoHolder, Program from paddle.framework import LayerHelper, OpProtoHolder, Program, core
from paddle.fluid.layer_helper import LayerHelper
from paddle.utils import unique_name from paddle.utils import unique_name
from .cost import ( from .cost import (
...@@ -310,7 +308,7 @@ class Inserter: ...@@ -310,7 +308,7 @@ class Inserter:
@staticmethod @staticmethod
def insert_cast_op(block, idx, tensor, op_role, tensor_type): def insert_cast_op(block, idx, tensor, op_role, tensor_type):
# to avoid name conflict with framework # to avoid name conflict with framework
new_var_name = paddle.fluid.unique_name.generate_with_ignorable_key( new_var_name = paddle.utils.unique_name.generate_with_ignorable_key(
".".join(["cast@RESHARD", 'tmp']) ".".join(["cast@RESHARD", 'tmp'])
) )
out = block.create_var( out = block.create_var(
...@@ -380,7 +378,7 @@ class Inserter: ...@@ -380,7 +378,7 @@ class Inserter:
def insert_reset_lod_op(block, idx, X, Y, op_role): def insert_reset_lod_op(block, idx, X, Y, op_role):
"""Insert reset_lod op into block at the given index.""" """Insert reset_lod op into block at the given index."""
new_var_name = paddle.fluid.unique_name.generate_with_ignorable_key( new_var_name = paddle.utils.unique_name.generate_with_ignorable_key(
".".join(["reset_lod@RESHARD", 'tmp']) ".".join(["reset_lod@RESHARD", 'tmp'])
) )
reset_lod_out = block.create_var( reset_lod_out = block.create_var(
...@@ -412,7 +410,7 @@ class Inserter: ...@@ -412,7 +410,7 @@ class Inserter:
helper = LayerHelper('concat@RESHARD', **locals()) helper = LayerHelper('concat@RESHARD', **locals())
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
out = block.create_var( out = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join([helper.name, 'tmp']) ".".join([helper.name, 'tmp'])
), ),
dtype=tensors[0].dtype, dtype=tensors[0].dtype,
...@@ -484,7 +482,7 @@ class Inserter: ...@@ -484,7 +482,7 @@ class Inserter:
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
outs = [ outs = [
block.create_var( block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join(['split@RESHARD', 'tmp']) ".".join(['split@RESHARD', 'tmp'])
), ),
dtype=tensor.dtype, dtype=tensor.dtype,
...@@ -550,7 +548,7 @@ class Inserter: ...@@ -550,7 +548,7 @@ class Inserter:
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
outs = [ outs = [
block.create_var( block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join([helper.name, 'tmp']) ".".join([helper.name, 'tmp'])
), ),
dtype=tensor.dtype, dtype=tensor.dtype,
...@@ -576,7 +574,7 @@ class Inserter: ...@@ -576,7 +574,7 @@ class Inserter:
# use paddle.int64 as dtype # use paddle.int64 as dtype
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
out = block.create_var( out = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join([helper.name, 'tmp']) ".".join([helper.name, 'tmp'])
), ),
dtype=paddle.int64, dtype=paddle.int64,
...@@ -650,7 +648,7 @@ class Inserter: ...@@ -650,7 +648,7 @@ class Inserter:
helper = LayerHelper(op_type + "@RESHARD", **locals()) helper = LayerHelper(op_type + "@RESHARD", **locals())
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
allgather_out = block.create_var( allgather_out = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join([helper.name, 'tmp']) ".".join([helper.name, 'tmp'])
), ),
dtype=tensor.dtype, dtype=tensor.dtype,
...@@ -695,7 +693,7 @@ class Inserter: ...@@ -695,7 +693,7 @@ class Inserter:
helper = LayerHelper(op_type + "@RESHARD", **locals()) helper = LayerHelper(op_type + "@RESHARD", **locals())
with paddle.static.program_guard(block.program): with paddle.static.program_guard(block.program):
c_concat_out = block.create_var( c_concat_out = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join([helper.name, 'tmp']) ".".join([helper.name, 'tmp'])
), ),
dtype=tensor.dtype, dtype=tensor.dtype,
......
...@@ -41,8 +41,8 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -41,8 +41,8 @@ from paddle.distributed.auto_parallel.utils import (
set_grad_var_shape, set_grad_var_shape,
) )
from paddle.distributed.passes import PassContext, new_pass from paddle.distributed.passes import PassContext, new_pass
from paddle.fluid import program_guard, unique_name from paddle.static import append_backward, program_guard
from paddle.fluid.backward import append_backward from paddle.utils import unique_name
from ..utils import get_logger from ..utils import get_logger
from .algorithms import new_algorithm from .algorithms import new_algorithm
......
...@@ -28,7 +28,8 @@ from paddle.distributed.auto_parallel.process_group import ( ...@@ -28,7 +28,8 @@ from paddle.distributed.auto_parallel.process_group import (
new_process_group, new_process_group,
) )
from paddle.distributed.collective import _get_global_env from paddle.distributed.collective import _get_global_env
from paddle.fluid.framework import Operator, Program, _current_expected_place from paddle.framework import Program, _current_expected_place
from paddle.static import Operator
paddle.enable_static() paddle.enable_static()
......
...@@ -22,9 +22,9 @@ from functools import reduce ...@@ -22,9 +22,9 @@ from functools import reduce
import numpy as np import numpy as np
import paddle import paddle
from paddle.fluid.framework import Variable
from paddle.fluid.io import is_belong_to_optimizer, is_parameter from paddle.fluid.io import is_belong_to_optimizer, is_parameter
from paddle.framework import core from paddle.framework import core
from paddle.static import Variable
from .dist_attribute import OperatorDistAttr, TensorDistAttr from .dist_attribute import OperatorDistAttr, TensorDistAttr
from .process_group import get_all_process_groups from .process_group import get_all_process_groups
...@@ -619,7 +619,7 @@ def save_distributed_checkpoint( ...@@ -619,7 +619,7 @@ def save_distributed_checkpoint(
""" """
from .dist_context import get_default_distributed_context from .dist_context import get_default_distributed_context
assert isinstance(program, paddle.fluid.framework.Program) assert isinstance(program, paddle.static.Program)
assert isinstance(is_integrated, bool) assert isinstance(is_integrated, bool)
if dist_context is None: if dist_context is None:
dist_context = get_default_distributed_context() dist_context = get_default_distributed_context()
...@@ -702,7 +702,7 @@ def load_checkpoint_into_program( ...@@ -702,7 +702,7 @@ def load_checkpoint_into_program(
""" """
from .dist_context import get_default_distributed_context from .dist_context import get_default_distributed_context
assert isinstance(program, paddle.fluid.framework.Program) assert isinstance(program, paddle.static.Program)
assert _check_valid_path( assert _check_valid_path(
checkpoint_path checkpoint_path
), "'checkpoint_path' cannot be None." ), "'checkpoint_path' cannot be None."
...@@ -731,7 +731,7 @@ def load_parameter_into_program(param_dict, program): ...@@ -731,7 +731,7 @@ def load_parameter_into_program(param_dict, program):
program(Program): the program to be updated program(Program): the program to be updated
""" """
assert isinstance(param_dict, dict) assert isinstance(param_dict, dict)
assert program and isinstance(program, paddle.fluid.framework.Program) assert program and isinstance(program, paddle.static.Program)
if not param_dict: if not param_dict:
return return
program.set_state_dict(param_dict) program.set_state_dict(param_dict)
...@@ -818,7 +818,7 @@ def get_dist_attr(program, dist_context=None): ...@@ -818,7 +818,7 @@ def get_dist_attr(program, dist_context=None):
""" """
from .dist_context import get_default_distributed_context from .dist_context import get_default_distributed_context
assert isinstance(program, paddle.fluid.framework.Program) assert isinstance(program, paddle.static.Program)
if dist_context is None: if dist_context is None:
dist_context = get_default_distributed_context() dist_context = get_default_distributed_context()
dist_attr = {} dist_attr = {}
...@@ -1845,7 +1845,7 @@ def get_var_numel(var): ...@@ -1845,7 +1845,7 @@ def get_var_numel(var):
def get_lr(optimizer): def get_lr(optimizer):
if isinstance(optimizer, paddle.optimizer.Optimizer): if isinstance(optimizer, paddle.optimizer.Optimizer):
return optimizer.get_lr() return optimizer.get_lr()
elif isinstance(optimizer, paddle.fluid.optimizer.Optimizer): elif isinstance(optimizer, paddle.static.Optimizer):
if isinstance(optimizer._learning_rate, float): if isinstance(optimizer._learning_rate, float):
return optimizer._learning_rate return optimizer._learning_rate
else: else:
...@@ -1853,9 +1853,7 @@ def get_lr(optimizer): ...@@ -1853,9 +1853,7 @@ def get_lr(optimizer):
else: else:
raise TypeError( raise TypeError(
"'optimizer' must be object of class `paddle.optimizer.Optimizer`" "'optimizer' must be object of class `paddle.optimizer.Optimizer`"
" or `paddle.fluid.optimizer.Optimizer`, but got {}.".format( " or `paddle.static.Optimizer`, but got {}.".format(type(optimizer))
type(optimizer)
)
) )
......
...@@ -23,7 +23,6 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -23,7 +23,6 @@ from paddle.distributed.auto_parallel.utils import (
set_var_dist_attr, set_var_dist_attr,
) )
from paddle.distributed.fleet.meta_optimizers.common import OpRole from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.fluid import unique_name
from paddle.fluid.contrib.mixed_precision.fp16_utils import ( from paddle.fluid.contrib.mixed_precision.fp16_utils import (
AutoMixedPrecisionLists, AutoMixedPrecisionLists,
_dtype_to_str, _dtype_to_str,
...@@ -38,6 +37,7 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import ( ...@@ -38,6 +37,7 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import (
) )
from paddle.fluid.data_feeder import check_type, check_variable_and_dtype from paddle.fluid.data_feeder import check_type, check_variable_and_dtype
from paddle.framework import core from paddle.framework import core
from paddle.utils import unique_name
from ..auto_parallel.process_mesh import ProcessMesh from ..auto_parallel.process_mesh import ProcessMesh
from ..auto_parallel.utils import is_backward_op, is_forward_op, is_loss_op from ..auto_parallel.utils import is_backward_op, is_forward_op, is_loss_op
...@@ -523,7 +523,7 @@ def _update_backward_cast_ops(params_grads, dist_context): ...@@ -523,7 +523,7 @@ def _update_backward_cast_ops(params_grads, dist_context):
# add new op in the python and cpp at the same time # add new op in the python and cpp at the same time
new_op_desc = main_block.desc.append_op() new_op_desc = main_block.desc.append_op()
new_op_desc.copy_from(op.desc) new_op_desc.copy_from(op.desc)
new_op = paddle.fluid.framework.Operator( new_op = paddle.static.Operator(
block=main_block, block=main_block,
desc=new_op_desc, desc=new_op_desc,
type=None, type=None,
...@@ -898,7 +898,7 @@ class AMPPass(PassBase): ...@@ -898,7 +898,7 @@ class AMPPass(PassBase):
OP_ROLE_KEY, core.op_proto_and_checker_maker.OpRole.Backward OP_ROLE_KEY, core.op_proto_and_checker_maker.OpRole.Backward
) )
elementwise_mul_grad_op_desc._set_attr('axis', -1) elementwise_mul_grad_op_desc._set_attr('axis', -1)
elementwise_mul_grad_op = paddle.fluid.framework.Operator( elementwise_mul_grad_op = paddle.static.Operator(
main_block, elementwise_mul_grad_op_desc main_block, elementwise_mul_grad_op_desc
) )
main_block.ops.insert(loss_op_idx + 3, elementwise_mul_grad_op) main_block.ops.insert(loss_op_idx + 3, elementwise_mul_grad_op)
......
...@@ -29,9 +29,9 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -29,9 +29,9 @@ from paddle.distributed.auto_parallel.utils import (
ring_id_to_process_group, ring_id_to_process_group,
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import unique_name
from paddle.fluid.executor import _is_enable_standalone_executor from paddle.fluid.executor import _is_enable_standalone_executor
from paddle.fluid.framework import default_main_program from paddle.static import default_main_program
from paddle.utils import unique_name
from .pass_base import PassBase, PassType, register_pass from .pass_base import PassBase, PassType, register_pass
......
...@@ -26,7 +26,6 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -26,7 +26,6 @@ from paddle.distributed.auto_parallel.utils import (
set_var_dist_attr, set_var_dist_attr,
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import unique_name
from paddle.fluid.contrib.mixed_precision.fp16_utils import ( from paddle.fluid.contrib.mixed_precision.fp16_utils import (
AutoMixedPrecisionLists, AutoMixedPrecisionLists,
_dtype_to_str, _dtype_to_str,
...@@ -35,8 +34,9 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import ( ...@@ -35,8 +34,9 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import (
_valid_types, _valid_types,
) )
from paddle.fluid.data_feeder import check_type, check_variable_and_dtype from paddle.fluid.data_feeder import check_type, check_variable_and_dtype
from paddle.fluid.framework import default_main_program, default_startup_program
from paddle.framework import core from paddle.framework import core
from paddle.static import default_main_program, default_startup_program
from paddle.utils import unique_name
from ..auto_parallel.process_mesh import ProcessMesh from ..auto_parallel.process_mesh import ProcessMesh
from .auto_parallel_amp import AMPPass from .auto_parallel_amp import AMPPass
...@@ -790,7 +790,7 @@ class FP16Pass(AMPPass): ...@@ -790,7 +790,7 @@ class FP16Pass(AMPPass):
# all_infs = paddle.fluid.layers.concat(found_infs) # all_infs = paddle.fluid.layers.concat(found_infs)
all_infs = block.create_var( all_infs = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join(['concat', 'tmp']) ".".join(['concat', 'tmp'])
), ),
dtype=found_infs[0].dtype, dtype=found_infs[0].dtype,
...@@ -821,7 +821,7 @@ class FP16Pass(AMPPass): ...@@ -821,7 +821,7 @@ class FP16Pass(AMPPass):
# found_inf = paddle.fluid.layers.reduce_any(all_infs) # found_inf = paddle.fluid.layers.reduce_any(all_infs)
found_inf = block.create_var( found_inf = block.create_var(
name=paddle.fluid.unique_name.generate_with_ignorable_key( name=paddle.utils.unique_name.generate_with_ignorable_key(
".".join(['reduce_any', 'tmp']) ".".join(['reduce_any', 'tmp'])
), ),
dtype=all_infs.dtype, dtype=all_infs.dtype,
...@@ -867,7 +867,8 @@ class FP16Pass(AMPPass): ...@@ -867,7 +867,8 @@ class FP16Pass(AMPPass):
if self.get_attr("use_optimizer_fp16"): if self.get_attr("use_optimizer_fp16"):
base_opt._multi_precision = False base_opt._multi_precision = False
if isinstance( if isinstance(
base_opt, (paddle.fluid.optimizer.Adam, paddle.optimizer.AdamW) base_opt,
(paddle.static.Adam, paddle.optimizer.AdamW),
): ):
with main_program._optimized_guard([]): with main_program._optimized_guard([]):
# found_inf = paddle.tensor.creation._memcpy( # found_inf = paddle.tensor.creation._memcpy(
......
...@@ -26,8 +26,8 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -26,8 +26,8 @@ from paddle.distributed.auto_parallel.utils import (
) )
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import layers from paddle.fluid import layers
from paddle.fluid.framework import device_guard
from paddle.framework import core from paddle.framework import core
from paddle.static import device_guard
from .pass_base import PassBase, PassType, register_pass from .pass_base import PassBase, PassType, register_pass
......
...@@ -17,8 +17,8 @@ import logging ...@@ -17,8 +17,8 @@ import logging
import numpy as np import numpy as np
import paddle import paddle
from paddle.fluid import core, framework
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.framework import IrGraph, core
from paddle.static.quantization import ( from paddle.static.quantization import (
AddQuantDequantForInferencePass, AddQuantDequantForInferencePass,
AddQuantDequantPassV2, AddQuantDequantPassV2,
...@@ -72,7 +72,7 @@ class QuantizationPass(PassBase): ...@@ -72,7 +72,7 @@ class QuantizationPass(PassBase):
# TODO: scope and place will be removed, # TODO: scope and place will be removed,
# cause params should be initialized by engine module. # cause params should be initialized by engine module.
scope = paddle.static.global_scope() scope = paddle.static.global_scope()
place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) place = paddle.framework.CUDAPlace(ParallelEnv().dev_id)
# 0. record the relation among blocks # 0. record the relation among blocks
parent_idx_dict = dict() parent_idx_dict = dict()
...@@ -81,7 +81,7 @@ class QuantizationPass(PassBase): ...@@ -81,7 +81,7 @@ class QuantizationPass(PassBase):
is_test = True if mode != "train" else False is_test = True if mode != "train" else False
# 1. Program convert to Graph, and this pass is only for train mode # 1. Program convert to Graph, and this pass is only for train mode
main_graph = framework.IrGraph( main_graph = IrGraph(
core.Graph(main_program.desc), for_test=mode != "train" core.Graph(main_program.desc), for_test=mode != "train"
) )
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import logging import logging
import paddle
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.fluid import core, framework, unique_name
from paddle.fluid.backward import ( from paddle.fluid.backward import (
ProgramStats, ProgramStats,
_append_grad_suffix_, _append_grad_suffix_,
...@@ -23,6 +23,8 @@ from paddle.fluid.backward import ( ...@@ -23,6 +23,8 @@ from paddle.fluid.backward import (
_get_no_grad_set_name, _get_no_grad_set_name,
_rename_arg_, _rename_arg_,
) )
from paddle.framework import core
from paddle.utils import unique_name
from ..auto_parallel.dist_attribute import OperatorDistAttr from ..auto_parallel.dist_attribute import OperatorDistAttr
from ..auto_parallel.utils import ( from ..auto_parallel.utils import (
...@@ -221,7 +223,8 @@ def _add_needed_descs_to_block( ...@@ -221,7 +223,8 @@ def _add_needed_descs_to_block(
result_descs = [] result_descs = []
for desc in descs: for desc in descs:
if isinstance(desc, framework.Operator): # if isinstance(desc, framework.Operator):
if isinstance(desc, paddle.static.Operator):
desc = desc.desc desc = desc.desc
if isinstance(desc, tuple): if isinstance(desc, tuple):
desc = desc[0] desc = desc[0]
......
...@@ -35,10 +35,10 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -35,10 +35,10 @@ from paddle.distributed.auto_parallel.utils import (
set_var_dist_attr, set_var_dist_attr,
) )
from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
from paddle.fluid import unique_name
from paddle.fluid.executor import _is_enable_standalone_executor from paddle.fluid.executor import _is_enable_standalone_executor
from paddle.fluid.framework import default_main_program, default_startup_program
from paddle.framework import core from paddle.framework import core
from paddle.static import default_main_program, default_startup_program
from paddle.utils import unique_name
from .pass_base import PassBase, register_pass from .pass_base import PassBase, register_pass
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
import unittest import unittest
from paddle.distributed.auto_parallel.cluster_v2 import Device, DeviceMesh, Link from paddle.distributed.auto_parallel.cluster_v2 import DeviceMesh
from paddle.framework import core
class TestDeviceMesh(unittest.TestCase): class TestDeviceMesh(unittest.TestCase):
...@@ -38,12 +39,12 @@ class TestDeviceMesh(unittest.TestCase): ...@@ -38,12 +39,12 @@ class TestDeviceMesh(unittest.TestCase):
self.assertEqual(device_mesh.contains(0), True) self.assertEqual(device_mesh.contains(0), True)
self.assertEqual(device_mesh.contains(6), False) self.assertEqual(device_mesh.contains(6), False)
dev0 = Device(global_id=0, local_id=0, machine_id=0, type="GPU") dev0 = core.Device(global_id=0, local_id=0, machine_id=0, type="GPU")
dev1 = Device(global_id=1, local_id=1, machine_id=0, type="GPU") dev1 = core.Device(global_id=1, local_id=1, machine_id=0, type="GPU")
dev2 = Device(global_id=2, local_id=2, machine_id=0, type="GPU") dev2 = core.Device(global_id=2, local_id=2, machine_id=0, type="GPU")
dev3 = Device(global_id=3, local_id=0, machine_id=1, type="GPU") dev3 = core.Device(global_id=3, local_id=0, machine_id=1, type="GPU")
dev4 = Device(global_id=4, local_id=1, machine_id=1, type="GPU") dev4 = core.Device(global_id=4, local_id=1, machine_id=1, type="GPU")
dev5 = Device(global_id=5, local_id=2, machine_id=1, type="GPU") dev5 = core.Device(global_id=5, local_id=2, machine_id=1, type="GPU")
device_mesh.add_device(dev0) device_mesh.add_device(dev0)
device_mesh.add_device(dev1) device_mesh.add_device(dev1)
device_mesh.add_device(dev2) device_mesh.add_device(dev2)
...@@ -57,10 +58,10 @@ class TestDeviceMesh(unittest.TestCase): ...@@ -57,10 +58,10 @@ class TestDeviceMesh(unittest.TestCase):
self.assertEqual(device_mesh.device(4), dev4) self.assertEqual(device_mesh.device(4), dev4)
self.assertEqual(device_mesh.device(5), dev5) self.assertEqual(device_mesh.device(5), dev5)
link0 = Link(source_id=0, target_id=1, type="NVL") link0 = core.Link(source_id=0, target_id=1, type="NVL")
link1 = Link(source_id=1, target_id=0, type="NVL") link1 = core.Link(source_id=1, target_id=0, type="NVL")
link2 = Link(source_id=3, target_id=4, type="NVL") link2 = core.Link(source_id=3, target_id=4, type="NVL")
link3 = Link(source_id=4, target_id=5, type="NVL") link3 = core.Link(source_id=4, target_id=5, type="NVL")
device_mesh.add_link(link0) device_mesh.add_link(link0)
device_mesh.add_link(link1) device_mesh.add_link(link1)
device_mesh.add_link(link2) device_mesh.add_link(link2)
...@@ -90,7 +91,7 @@ class TestDeviceMesh(unittest.TestCase): ...@@ -90,7 +91,7 @@ class TestDeviceMesh(unittest.TestCase):
self.assertEqual(str(device_mesh), str(device_mesh)) self.assertEqual(str(device_mesh), str(device_mesh))
def test_device(self): def test_device(self):
device = Device(global_id=0, local_id=1, machine_id=2, type="GPU") device = core.Device(global_id=0, local_id=1, machine_id=2, type="GPU")
device.capability.sflops = 100 device.capability.sflops = 100
device.capability.dflops = 200 device.capability.dflops = 200
device.capability.memory = 32 device.capability.memory = 32
...@@ -107,7 +108,7 @@ class TestDeviceMesh(unittest.TestCase): ...@@ -107,7 +108,7 @@ class TestDeviceMesh(unittest.TestCase):
self.assertEqual(str(device), str(device)) self.assertEqual(str(device), str(device))
def test_link(self): def test_link(self):
link = Link(source_id=0, target_id=1, type="NVL") link = core.Link(source_id=0, target_id=1, type="NVL")
link.capability.bandwidth = 100 link.capability.bandwidth = 100
link.capability.latency = 1 link.capability.latency = 1
self.assertEqual(link.source_id, 0) self.assertEqual(link.source_id, 0)
......
...@@ -69,6 +69,7 @@ from ..fluid.framework import _apply_pass # noqa: F401 ...@@ -69,6 +69,7 @@ from ..fluid.framework import _apply_pass # noqa: F401
from ..fluid.framework import switch_main_program from ..fluid.framework import switch_main_program
from ..fluid.framework import _set_expected_place # noqa: F401 from ..fluid.framework import _set_expected_place # noqa: F401
from ..fluid.framework import Block, Program # noqa: F401 from ..fluid.framework import Block, Program # noqa: F401
from ..fluid.framework import IrGraph # noqa: F401
from ..fluid.dygraph import parallel_helper # noqa: F401 from ..fluid.dygraph import parallel_helper # noqa: F401
from ..fluid.dygraph.parallel import ( from ..fluid.dygraph.parallel import (
_split_tensors, _split_tensors,
......
...@@ -56,11 +56,15 @@ from ..fluid.framework import xpu_places # noqa: F401 ...@@ -56,11 +56,15 @@ from ..fluid.framework import xpu_places # noqa: F401
from ..fluid.framework import mlu_places # noqa: F401 from ..fluid.framework import mlu_places # noqa: F401
from ..fluid.framework import npu_places # noqa: F401 from ..fluid.framework import npu_places # noqa: F401
from ..fluid.framework import Variable # noqa: F401 from ..fluid.framework import Variable # noqa: F401
from ..fluid.framework import Operator # noqa: F401
from ..fluid.framework import Parameter # noqa: F401
from ..fluid.framework import ipu_shard_guard # noqa: F401 from ..fluid.framework import ipu_shard_guard # noqa: F401
from ..fluid.framework import set_ipu_shard # noqa: F401 from ..fluid.framework import set_ipu_shard # noqa: F401
from ..fluid.layers.control_flow import Print # noqa: F401 from ..fluid.layers.control_flow import Print # noqa: F401
from ..fluid.parallel_executor import ParallelExecutor # noqa: F401 from ..fluid.parallel_executor import ParallelExecutor # noqa: F401
from ..fluid.param_attr import WeightNormParamAttr # noqa: F401 from ..fluid.param_attr import WeightNormParamAttr # noqa: F401
from ..fluid.optimizer import Optimizer # noqa: F401
from ..fluid.optimizer import Adam # noqa: F401
from ..fluid.optimizer import ExponentialMovingAverage # noqa: F401 from ..fluid.optimizer import ExponentialMovingAverage # noqa: F401
from ..fluid.io import save # noqa: F401 from ..fluid.io import save # noqa: F401
from ..fluid.io import load # noqa: F401 from ..fluid.io import load # noqa: F401
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from ..fluid.unique_name import generate # noqa: F401 from ..fluid.unique_name import generate # noqa: F401
from ..fluid.unique_name import generate_with_ignorable_key # noqa: F401
from ..fluid.unique_name import guard # noqa: F401 from ..fluid.unique_name import guard # noqa: F401
from ..fluid.unique_name import switch # noqa: F401 from ..fluid.unique_name import switch # noqa: F401
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册