diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 652ef95c8d9456faf06285cd01da6a70eb8a67a7..4f02099af8b7371a05752c7993fa8deb4a8366a3 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -254,7 +254,7 @@ cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor) cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper) -cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector) +cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector op_registry while_op_helper recurrent_op_helper conditional_block_op_helper) if(WITH_DISTRIBUTE) if(WITH_PSLIB) cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc index c06a3d4a183799c7c8ca130f9ff48e7bff23a3bd..4b7c8c6e3f49bca036a0bf1f367071b273381f01 100644 --- a/paddle/fluid/framework/executor_gc_helper.cc +++ b/paddle/fluid/framework/executor_gc_helper.cc @@ -20,8 +20,12 @@ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_desc.h" +#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" +#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" +#include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -185,5 +189,91 @@ void DeleteUnusedTensors( } } +static std::vector> CreateOpsFromBlock( + const BlockDesc &block) { + std::vector> ops; + size_t op_num = block.OpSize(); + ops.reserve(op_num); + for (size_t i = 0; i < op_num; ++i) { + auto *op_desc = block.Op(i); + ops.push_back(OpRegistry::CreateOp(*op_desc)); + } + return ops; +} + +std::vector>> GetEagerDeletionCleanVars( + const ProgramDesc &origin_program, + const std::vector &skip_vars) { + ProgramDesc program{origin_program}; + size_t block_num = program.Size(); + PADDLE_ENFORCE_GE(block_num, 1, + platform::errors::PermissionDenied( + "Program should have at least one block")); + + // prepare safe GCs on sub block ops + auto global_block_ops = CreateOpsFromBlock(program.Block(0)); + operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp( + program, 0, global_block_ops); + operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(program, 0, + global_block_ops); + operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( + program, 0, global_block_ops); + + // find the skip vars on each block + std::vector> skip_vars_on_each_block(block_num); + skip_vars_on_each_block[0] = skip_vars; + std::vector found_skip_vars(block_num, false); + found_skip_vars[0] = true; + + const char *kSubBlock = "sub_block"; + const char *kSkipEagerDeletionVars = "skip_eager_deletion_vars"; + + for (size_t i = 0; i < block_num; ++i) { + const auto &block = program.Block(i); + size_t op_num = block.OpSize(); + for (size_t j = 0; j < op_num; ++j) { + auto *op = block.Op(j); + if (!op->HasAttr(kSubBlock) || !op->HasAttr(kSkipEagerDeletionVars)) { + continue; + } + auto sub_block_id = op->GetAttrIfExists(kSubBlock)->ID(); + PADDLE_ENFORCE_GE(sub_block_id, 0, + platform::errors::PermissionDenied( + "sub_block id must be non-negative number")); + PADDLE_ENFORCE_LT(sub_block_id, block_num, + platform::errors::PermissionDenied( + "sub_block id exceeds max block num")); + PADDLE_ENFORCE_EQ( + found_skip_vars[sub_block_id], false, + platform::errors::PermissionDenied( + "there are 2 ops which refer to the same sub_block %d", + sub_block_id)); + + found_skip_vars[sub_block_id] = true; + auto sub_block_skip_vars = + op->GetAttrIfExists>(kSkipEagerDeletionVars); + skip_vars_on_each_block[sub_block_id] = std::move(sub_block_skip_vars); + } + } + + std::vector>> result; + result.reserve(block_num); + for (size_t i = 0; i < block_num; ++i) { + const auto &block = program.Block(i); + const auto block_ops = CreateOpsFromBlock(block); + const auto &block_skip_vars = skip_vars_on_each_block[i]; + auto delete_var_map = GetUnusedVars(block, block_ops, block_skip_vars); + std::vector> block_result; + block_result.reserve(block_ops.size()); + for (const auto &op : block_ops) { + auto &delete_vars = delete_var_map[op.get()]; + std::sort(delete_vars.begin(), delete_vars.end()); // for stable result + block_result.emplace_back(delete_vars); + } + result.emplace_back(std::move(block_result)); + } + return result; +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/executor_gc_helper.h b/paddle/fluid/framework/executor_gc_helper.h index e44edc5aa1c810f859942a62763e0c9179885987..886341791bade8697773bac69722f6827d5e33d8 100644 --- a/paddle/fluid/framework/executor_gc_helper.h +++ b/paddle/fluid/framework/executor_gc_helper.h @@ -43,5 +43,11 @@ void DeleteUnusedTensors( &delete_vars_map, GarbageCollector *gc); +// Get the clean vars of GC after each op runs. This function is used for +// analysis statically. +// result is in the format: result[block_idx][op_idx][delete_var_idx] +std::vector>> GetEagerDeletionCleanVars( + const ProgramDesc &program, const std::vector &skip_vars = {}); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5508c516fbbbc1e6276097775e0f92c60367b2a7..4a43e51e7cabcfe76418f7187f755bb0bce5455d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -31,6 +31,7 @@ limitations under the License. */ #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/garbage_collector.h" @@ -1849,6 +1850,8 @@ All parameter, weight, gradient are variables in Paddle. py::return_value_policy::reference) .def("finalize", &TrainerBase::Finalize); + m.def("_get_eager_deletion_vars", &framework::GetEagerDeletionCleanVars); + py::class_(m, "Executor") .def(py::init()) .def("close", &Executor::Close) diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index c42580676af598dc37b77edc7f5fb1fc7456d076..ae1a944f7a29a733f3e15127f3457f507da2d569 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -263,6 +263,7 @@ if avx_supported(): from .core_avx import _get_all_register_op_kernels from .core_avx import _is_program_version_supported from .core_avx import _set_eager_deletion_mode + from .core_avx import _get_eager_deletion_vars from .core_avx import _set_fuse_parameter_group_size from .core_avx import _set_fuse_parameter_memory_size from .core_avx import _is_dygraph_debug_enabled @@ -311,6 +312,7 @@ if load_noavx: from .core_noavx import _get_all_register_op_kernels from .core_noavx import _is_program_version_supported from .core_noavx import _set_eager_deletion_mode + from .core_noavx import _get_eager_deletion_vars from .core_noavx import _set_fuse_parameter_group_size from .core_noavx import _set_fuse_parameter_memory_size from .core_noavx import _is_dygraph_debug_enabled diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py index 1590d866b1c73c88ed2c3fbd1fceb90831954b13..de85c76351448ad35ce5dd65a0e10ead207d25cc 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py @@ -21,6 +21,10 @@ import paddle.fluid as fluid import six import unittest import multiprocessing +from functools import reduce + +import paddle +paddle.enable_static() fluid.core._set_eager_deletion_mode(0.0, 1.0, True) @@ -114,6 +118,12 @@ class TestExecutor(unittest.TestCase): self.assertEqual(len(outline_p_vars), 0) self.assertEqual(len(outline_np_vars), 0) + def assert_gc_vars(self, program, skip_vars, non_persistable_vars): + gc_vars = fluid.core._get_eager_deletion_vars(program.desc, skip_vars) + self.assertEqual(len(gc_vars), program.num_blocks) + gc_vars = reduce(lambda x, y: x + y, gc_vars[0]) + self.assertEqual(set(gc_vars), set(non_persistable_vars)) + def executor_main(self): image, label, loss = simple_fc_net() loss.persistable = False @@ -122,6 +132,9 @@ class TestExecutor(unittest.TestCase): print('Non-persistable var number {}'.format(len(non_persistables))) print(non_persistables) + self.assert_gc_vars(fluid.default_main_program(), [loss.name], + non_persistables) + exe = fluid.Executor(self.place) exe.run(fluid.default_startup_program()) @@ -147,6 +160,8 @@ class TestExecutor(unittest.TestCase): loss.persistable = False persistables, non_persistables = get_persistables_and_non_persistables( fluid.default_main_program(), [loss.name]) + self.assert_gc_vars(fluid.default_main_program(), [loss.name], + non_persistables) exe = fluid.Executor(self.place) exe.run(fluid.default_startup_program()) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index ef4cbf0b742e15291781688a9bfa2d19fd2bae73..01d8cbc5b7dd1dceca1f96b3207393990e9a4d4b 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -26,6 +26,8 @@ from paddle.fluid import ParamAttr from paddle.fluid.framework import Program, grad_var_name from paddle.fluid.executor import Executor from paddle.fluid.backward import append_backward +import paddle +paddle.enable_static() np.random.seed(123) os.environ["CPU_NUM"] = "1" @@ -163,6 +165,9 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): return rnn() def forward(self): + gc_vars = core._get_eager_deletion_vars(self.main_program.desc, + [self.output.name]) + self.assertEqual(len(gc_vars), self.main_program.num_blocks) self.feed_map = { x: create_tensor(getattr(self.py_rnn, x), self.place) for x in self.data_field @@ -184,6 +189,10 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): for x in self.data_field ] + gc_vars = core._get_eager_deletion_vars( + self.main_program.desc, [var.name for var in fetch_list]) + self.assertEqual(len(gc_vars), self.main_program.num_blocks) + exe = Executor(self.place) return exe.run(self.main_program, feed=self.feed_map, diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py index 45f385968cf41cd52ae625ed8008602982ae4d42..936651d8324fce55724e4f512ded9da0a8dc8ef1 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py @@ -27,6 +27,8 @@ import paddle.fluid.compiler as compiler import numpy import multiprocessing +import paddle +paddle.enable_static() fluid.core._set_eager_deletion_mode(0.0, 1.0, True) @@ -125,6 +127,10 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): optim = fluid.optimizer.Adam(learning_rate=1e-3) optim.minimize(loss) + gc_vars = core._get_eager_deletion_vars( + fluid.default_main_program().desc, [loss.name]) + self.assertEqual(len(gc_vars), 5) + exe = Executor(self.place) exe.run(fluid.default_startup_program())