From 9a4eec98135a742203aa61bde01185592507dce0 Mon Sep 17 00:00:00 2001 From: xiongkun Date: Mon, 13 Dec 2021 16:04:39 +0800 Subject: [PATCH] fix single card 8 unittests in new executor (#37957) * fix single card 8 unittests in new executor * fix * fix --- .../interpretercore_garbage_collector.cc | 8 ++++++-- paddle/fluid/pybind/pybind.cc | 12 ++++++++++++ .../slim/quantization/post_training_quantization.py | 3 +++ python/paddle/fluid/executor.py | 12 +++++++++++- .../test_imperative_transformer_sorted_gradient.py | 4 +++- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc b/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc index f17f64dbcae..40537815b48 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc @@ -69,11 +69,15 @@ void InterpreterCoreGarbageCollector::Add(paddle::framework::Variable* var, } else if (var->IsType< operators::reader:: OrderedMultiDeviceLoDTensorBlockingQueueHolder>()) { - // var->Clear(); // TODO(xiongkun03) can we clear directly? Why we must use - // Add interface? + // TODO(xiongkun03) in old executor, this type of variable is not support + // eager deletion. so we just leave it here ? + } else if (var->IsType()) { + // TODO(xiongkun03) in old executor, this type of variable is not support + // eager deletion. so we just leave it here ? } else if (var->IsType()) { Add(var->GetMutable()->mutable_value()->MoveMemoryHolder(), event, ctx); + var->GetMutable()->mutable_rows()->clear(); } else if (var->IsType()) { auto* tensor_arr = var->GetMutable(); for (auto& t : *tensor_arr) { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c5277a42103..f998c30dd15 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1437,6 +1437,18 @@ All parameter, weight, gradient are variables in Paddle. out (core.Variable|None): the found variable or None. )DOC", py::return_value_policy::reference) + .def("erase", &Scope::EraseVars, py::arg("names"), + R"DOC( + Find variable named :code:`name` in the current scope or + its parent scope. Return None if not found. + + Args: + name (str): the variable names to be erase. + + Returns: + None + )DOC", + py::return_value_policy::reference) .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, R"DOC( Create a new sub-scope of the current scope. diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 1ddb9c8e5fa..e9173a86b89 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -552,9 +552,12 @@ class PostTrainingQuantization(object): ''' Reset activations to be not persistable. ''' + to_erase = [] for var in self._program.list_vars(): if var.name in self._quantized_act_var_name: var.persistable = False + to_erase.append(var.name) + self._scope.erase(to_erase) def _sampling(self): ''' diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index d10564e21ea..c50af065bc4 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -401,7 +401,17 @@ def _is_enable_standalone_executor(): def _get_strong_program_cache_key(program, feed, fetch_list): - return str(id(program)) + _get_program_cache_key(feed, fetch_list) + # NOTE(xiongkun) id(proram) may be duplicate. So add addition var_name as cache key. + def _get_varname_from_block(block): + block_str = [] + for var_name in list(block.vars.keys()): + block_str.append(var_name) + return "\n".join(block_str) + + inner_program = program._program if isinstance( + program, compiler.CompiledProgram) else program + return _get_varname_from_block(inner_program.blocks[0]) + str(id( + program)) + _get_program_cache_key(feed, fetch_list) def _get_program_cache_key(feed, fetch_list): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 57da838c554..3f129cae44a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -316,7 +316,7 @@ sync = False # how many batches we use batch_num = 5 -np.random.seed = 90 +np.random.seed(90) src_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape( [TrainTaskConfig.batch_size, seq_len]).astype('int64') src_pos_np = np.random.randint( @@ -951,6 +951,8 @@ class TestDygraphTransformerSortGradient(unittest.TestCase): with guard(): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) + # NOTE(xiongkun03): In new executor, the inplace strategy is on by default, which will cause result of sumop have some differences. So we disable inplace. + fluid.set_flags({'FLAGS_new_executor_use_inplace': False}) paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) transformer = TransFormer( -- GitLab