From 96180fff65243316d64767a77c5e0536c6655e1b Mon Sep 17 00:00:00 2001 From: Meteor Liu Date: Fri, 28 Apr 2023 15:47:38 +0800 Subject: [PATCH] replace varbase to relevant name or notes as per the context (#53431) --- .../sharding/group_sharded_stage3.py | 12 +- python/paddle/distributed/parallel.py | 30 +++-- python/paddle/fluid/dygraph/base.py | 2 +- .../tests/unittests/test_eager_run_program.py | 5 +- .../tests/unittests/test_imperative_group.py | 106 +++++++++--------- .../test_view_op_reuse_allocation.py | 4 +- .../paddle/jit/dy2static/partial_program.py | 15 ++- python/paddle/jit/translated_layer.py | 6 +- test/dygraph_to_static/test_list.py | 6 +- 9 files changed, 90 insertions(+), 96 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py index d658fc1e82e..bdf0ec29cd7 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py @@ -584,7 +584,7 @@ class GroupShardedStage3(nn.Layer): param, "fw_storage" ), f"Find {param.name} don't have fw_storage attribute" - param.fw_storage = _VarBaseWrapper(param) + param.fw_storage = _TensorWrapper(param) assert param.fw_storage.grad is None param.fw_storage._copy_gradient_from(param.bw_storage) update_list.append(param) @@ -1062,17 +1062,17 @@ def _UnsliceParam(param): return param -def _VarBaseWrapper(param): - varbase = param.fw_storage +def _TensorWrapper(param): + var = param.fw_storage tmp_param = EagerParamBase( - shape=varbase.shape, dtype=varbase.dtype, name="slice@" + param.name + shape=var.shape, dtype=var.dtype, name="slice@" + param.name ) - varbase._share_buffer_to(tmp_param) + var._share_buffer_to(tmp_param) tmp_param.regularizer = param.regularizer tmp_param.optimize_attr['learning_rate'] = param.optimize_attr[ 'learning_rate' ] - varbase._clear() + var._clear() return tmp_param diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 30a3e82eaf1..1737d0731a9 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -146,20 +146,18 @@ def sync_params_buffers( for _, param in model._obtain_parameters_buffers().items(): if not isinstance(param, core.eager.Tensor): raise TypeError( - "The data type of '%s' must be Varbase or eager.Tensor" - % param.name + "The data type of '%s' must be core.eager.Tensor" % param.name ) - # is_distributed param not need to sync when in mp mode - if isinstance(param, core.eager.Tensor): - if is_model_parallel: - if hasattr(param, "is_distributed") and param.is_distributed: - continue - - # NOTE(shenliang03): Support situations that do not require synchronization parameters, - # such as moe's expert parameters - if getattr(param, "no_sync", False): + if is_model_parallel: + if hasattr(param, "is_distributed") and param.is_distributed: continue + + # NOTE(shenliang03): Support situations that do not require synchronization parameters, + # such as moe's expert parameters + if getattr(param, "no_sync", False): + continue + if param.type == core.VarDesc.VarType.VOCAB: continue @@ -474,14 +472,14 @@ class DataParallel(layers.Layer): self.find_unused_parameters, ) - def _find_varbase(self, obj): + def _find_tensor(self, obj): var_type = core.eager.Tensor if isinstance(obj, var_type): return [obj] if isinstance(obj, (list, tuple)): - return itertools.chain(*map(self._find_varbase, obj)) + return itertools.chain(*map(self._find_tensor, obj)) if isinstance(obj, dict): - return itertools.chain(*map(self._find_varbase, obj.values())) + return itertools.chain(*map(self._find_tensor, obj.values())) return [] @contextmanager @@ -536,9 +534,7 @@ class DataParallel(layers.Layer): and framework._dygraph_tracer()._has_grad and self.grad_need_sync ): - self._reducer.prepare_for_backward( - list(self._find_varbase(outputs)) - ) + self._reducer.prepare_for_backward(list(self._find_tensor(outputs))) return outputs @deprecated( diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 825cb00f44c..e76f0f3fa31 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -125,7 +125,7 @@ def param_guard(parameters): def _convert_into_variable(tensor): """ - Convert Varbase into Variable. + Convert Tensor into Variable. """ if isinstance(tensor, core.eager.Tensor): # Check whether has been created before. diff --git a/python/paddle/fluid/tests/unittests/test_eager_run_program.py b/python/paddle/fluid/tests/unittests/test_eager_run_program.py index 416a692038c..3ee2aaa9da8 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_run_program.py +++ b/python/paddle/fluid/tests/unittests/test_eager_run_program.py @@ -57,15 +57,14 @@ def _append_backward_desc(main_program, outs): def _create_out(var): assert isinstance(var, Variable) var_desc = var.desc - varbase = None - var_base = core.eager.Tensor( + out = core.eager.Tensor( var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False, ) - return var_base + return out @switch_to_static_graph diff --git a/python/paddle/fluid/tests/unittests/test_imperative_group.py b/python/paddle/fluid/tests/unittests/test_imperative_group.py index da0c5af627b..2a1fdad88bb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_group.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_group.py @@ -20,7 +20,7 @@ from paddle.fluid.framework import in_dygraph_mode class TestDataParallelGroup(unittest.TestCase): - def create_varbase(self, dtype, shape): + def _create_var(self, dtype, shape): return paddle.rand(shape=shape, dtype=dtype) def assign_group_by_size(self, *args): @@ -30,10 +30,10 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group0(self): # one dtype & one limit capability var_list = [] - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 100])) - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 100])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False], [400] ) @@ -42,12 +42,12 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group1(self): # multi dtype & one limit capability var_list = [] - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False, False, False], [400] ) @@ -56,10 +56,10 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group2(self): # one dtype & multi limit capability var_list = [] - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 50])) res = self.assign_group_by_size( var_list, [False, False, False, False], [400, 800] ) @@ -68,12 +68,12 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group3(self): # multi dtype & multi limit capability var_list = [] - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False, False, False], [200, 400] ) @@ -82,12 +82,12 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group4(self): # multi dtype & zero limit capability var_list = [] - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False, False, False], [0] ) @@ -96,12 +96,12 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group5(self): # multi dtype & infinite capability var_list = [] - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False, False, False], [10000] ) @@ -111,16 +111,16 @@ class TestDataParallelGroup(unittest.TestCase): # multi dtype & limit capability & multi tensor type var_list = [] var_list.append( - self.create_varbase( + self._create_var( "float32", [1, 50], ) ) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [True, False, False, False, False, True], [400] ) @@ -129,12 +129,12 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group7(self): # multi dtype & multi limit capability & multi tensor type var_list = [] - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) - var_list.append(self.create_varbase("float32", [1, 50])) - var_list.append(self.create_varbase("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) + var_list.append(self._create_var("float32", [1, 50])) + var_list.append(self._create_var("float64", [1, 25])) res = self.assign_group_by_size( var_list, [True, False, False, False, False, True], [200, 400] ) @@ -143,10 +143,10 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group8(self): # one dtype & one limit capability & have tensor_indices var_list = [] - var_list.append(self.create_varbase("float32", [2, 25])) - var_list.append(self.create_varbase("float32", [2, 100])) - var_list.append(self.create_varbase("float32", [2, 50])) - var_list.append(self.create_varbase("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 100])) + var_list.append(self._create_var("float32", [2, 50])) + var_list.append(self._create_var("float32", [2, 25])) res = self.assign_group_by_size( var_list, [False, False, False, False], [400], [3, 0, 1, 2] ) @@ -155,10 +155,10 @@ class TestDataParallelGroup(unittest.TestCase): def test_construct_group9(self): # one dtype & one limit capability & have tensor_indices var_list = [] - var_list.append(self.create_varbase("float32", [2, 25])) - var_list.append(self.create_varbase("float32", [2, 25])) - var_list.append(self.create_varbase("float32", [2, 25])) - var_list.append(self.create_varbase("float32", [2, 1000])) + var_list.append(self._create_var("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 25])) + var_list.append(self._create_var("float32", [2, 1000])) res = self.assign_group_by_size( var_list, [False, False, False, True], [300], [1, 0, 2, 3] ) diff --git a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py index ca261f63b00..cee82945410 100644 --- a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py +++ b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py @@ -21,8 +21,8 @@ import paddle # NOTE(pangyoki): Tensor View Strategy. # Refer to `op_function_generator.py`. -# For view op, a new output varbase will be created, and this varbase will -# reuse the input varbase's allocation. +# For view op, a new output tensor will be created, and this tensor will +# reuse the input tensor's allocation. # View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach` class TestDygraphViewReuseAllocation(unittest.TestCase): def setUp(self): diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 2a27c0eb15a..ad2e62b9e04 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -913,27 +913,26 @@ class PartialProgramLayer: input_vars.append(var) # mapping from name(string) -> Tensor - out_varbase_map = {} + out_tensor_map = {} def create_out(var_id): var = self._outputs[var_id] assert isinstance(var, framework.Variable) var_desc = var.desc - varbase = None - if var_desc.name() in out_varbase_map: - return out_varbase_map[var_desc.name()] + if var_desc.name() in out_tensor_map: + return out_tensor_map[var_desc.name()] - var_base = core.eager.Tensor( + out = core.eager.Tensor( var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False, ) - var_base.stop_gradient = var.stop_gradient - out_varbase_map[var_desc.name()] = var_base - return var_base + out.stop_gradient = var.stop_gradient + out_tensor_map[var_desc.name()] = out + return out # Create Tensor to receive output data. out_vars = list(map(create_out, self._outputs.var_ids)) diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py index 9f2fe5ee41b..06a89ce3640 100644 --- a/python/paddle/jit/translated_layer.py +++ b/python/paddle/jit/translated_layer.py @@ -644,7 +644,7 @@ class _ProgramHolder: # and executor, executes this program. Key points: # # 1. Data Sharing: -# The varBase of the dynamic graph is not in the scope, so before the op +# The variable/parameter of the dynamic graph is not in the scope, so before the op # executes the program internally, create persistent variables with the # same name as feed, parameters, and fetch in the scope, and share the # LoDTensor of the op input. @@ -668,7 +668,7 @@ def _load_persistable_vars_by_program( for each_var in persistable_vars: orig_each_name = program_holder._suffix_varname_dict[each_var.name()] if _is_parameter(each_var, program_holder.infer_program): - # create output varbase + # create output param new_var = framework.EagerParamBase( shape=each_var.shape(), dtype=each_var.dtype(), @@ -755,7 +755,7 @@ def _load_persistable_vars( ) # get suffix var name, see [why need to append suffix to persistable vars] new_name = inv_suffix_varname_dict[name] - # create output varbase + # create output var or param if extra_var_info[name].get('trainable', None) is not None: # use default shape and dtype new_var = framework.EagerParamBase( diff --git a/test/dygraph_to_static/test_list.py b/test/dygraph_to_static/test_list.py index 091d261ed74..577669279d6 100644 --- a/test/dygraph_to_static/test_list.py +++ b/test/dygraph_to_static/test_list.py @@ -228,7 +228,7 @@ class TestListWithoutControlFlow(unittest.TestCase): test_list_pop_without_control_flow_2, ] - def varbase_to_numpy(self, res): + def result_to_numpy(self, res): if isinstance(res, (list, tuple)): res = paddle.utils.map_structure(lambda x: x.numpy(), res) else: @@ -248,7 +248,7 @@ class TestListWithoutControlFlow(unittest.TestCase): res = paddle.jit.to_static(self.dygraph_func)(self.input) else: res = self.dygraph_func(self.input) - return self.varbase_to_numpy(res) + return self.result_to_numpy(res) def test_transformed_static_result(self): for dyfunc in self.all_dygraph_funcs: @@ -294,7 +294,7 @@ class TestListInWhileLoop(TestListWithoutControlFlow): ) else: res = self.dygraph_func(self.input, self.iter_num) - return self.varbase_to_numpy(res) + return self.result_to_numpy(res) class TestListInWhileLoopWithStack(TestListInWhileLoop): -- GitLab