未验证 提交 2b557da0 编写于 作者: Z Zeng Jinle 提交者: GitHub

expose gc analysis interface (#34092)

上级 3316409c
...@@ -254,7 +254,7 @@ cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor) ...@@ -254,7 +254,7 @@ cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper) cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector) cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector op_registry while_op_helper recurrent_op_helper conditional_block_op_helper)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(WITH_PSLIB) if(WITH_PSLIB)
cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc
......
...@@ -20,8 +20,12 @@ ...@@ -20,8 +20,12 @@
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
#include "paddle/fluid/operators/controlflow/while_op_helper.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -185,5 +189,91 @@ void DeleteUnusedTensors( ...@@ -185,5 +189,91 @@ void DeleteUnusedTensors(
} }
} }
static std::vector<std::unique_ptr<OperatorBase>> CreateOpsFromBlock(
const BlockDesc &block) {
std::vector<std::unique_ptr<OperatorBase>> ops;
size_t op_num = block.OpSize();
ops.reserve(op_num);
for (size_t i = 0; i < op_num; ++i) {
auto *op_desc = block.Op(i);
ops.push_back(OpRegistry::CreateOp(*op_desc));
}
return ops;
}
std::vector<std::vector<std::vector<std::string>>> GetEagerDeletionCleanVars(
const ProgramDesc &origin_program,
const std::vector<std::string> &skip_vars) {
ProgramDesc program{origin_program};
size_t block_num = program.Size();
PADDLE_ENFORCE_GE(block_num, 1,
platform::errors::PermissionDenied(
"Program should have at least one block"));
// prepare safe GCs on sub block ops
auto global_block_ops = CreateOpsFromBlock(program.Block(0));
operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
program, 0, global_block_ops);
operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(program, 0,
global_block_ops);
operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
program, 0, global_block_ops);
// find the skip vars on each block
std::vector<std::vector<std::string>> skip_vars_on_each_block(block_num);
skip_vars_on_each_block[0] = skip_vars;
std::vector<bool> found_skip_vars(block_num, false);
found_skip_vars[0] = true;
const char *kSubBlock = "sub_block";
const char *kSkipEagerDeletionVars = "skip_eager_deletion_vars";
for (size_t i = 0; i < block_num; ++i) {
const auto &block = program.Block(i);
size_t op_num = block.OpSize();
for (size_t j = 0; j < op_num; ++j) {
auto *op = block.Op(j);
if (!op->HasAttr(kSubBlock) || !op->HasAttr(kSkipEagerDeletionVars)) {
continue;
}
auto sub_block_id = op->GetAttrIfExists<BlockDesc *>(kSubBlock)->ID();
PADDLE_ENFORCE_GE(sub_block_id, 0,
platform::errors::PermissionDenied(
"sub_block id must be non-negative number"));
PADDLE_ENFORCE_LT(sub_block_id, block_num,
platform::errors::PermissionDenied(
"sub_block id exceeds max block num"));
PADDLE_ENFORCE_EQ(
found_skip_vars[sub_block_id], false,
platform::errors::PermissionDenied(
"there are 2 ops which refer to the same sub_block %d",
sub_block_id));
found_skip_vars[sub_block_id] = true;
auto sub_block_skip_vars =
op->GetAttrIfExists<std::vector<std::string>>(kSkipEagerDeletionVars);
skip_vars_on_each_block[sub_block_id] = std::move(sub_block_skip_vars);
}
}
std::vector<std::vector<std::vector<std::string>>> result;
result.reserve(block_num);
for (size_t i = 0; i < block_num; ++i) {
const auto &block = program.Block(i);
const auto block_ops = CreateOpsFromBlock(block);
const auto &block_skip_vars = skip_vars_on_each_block[i];
auto delete_var_map = GetUnusedVars(block, block_ops, block_skip_vars);
std::vector<std::vector<std::string>> block_result;
block_result.reserve(block_ops.size());
for (const auto &op : block_ops) {
auto &delete_vars = delete_var_map[op.get()];
std::sort(delete_vars.begin(), delete_vars.end()); // for stable result
block_result.emplace_back(delete_vars);
}
result.emplace_back(std::move(block_result));
}
return result;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -43,5 +43,11 @@ void DeleteUnusedTensors( ...@@ -43,5 +43,11 @@ void DeleteUnusedTensors(
&delete_vars_map, &delete_vars_map,
GarbageCollector *gc); GarbageCollector *gc);
// Get the clean vars of GC after each op runs. This function is used for
// analysis statically.
// result is in the format: result[block_idx][op_idx][delete_var_idx]
std::vector<std::vector<std::vector<std::string>>> GetEagerDeletionCleanVars(
const ProgramDesc &program, const std::vector<std::string> &skip_vars = {});
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -31,6 +31,7 @@ limitations under the License. */ ...@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/garbage_collector.h"
...@@ -1849,6 +1850,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1849,6 +1850,8 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference) py::return_value_policy::reference)
.def("finalize", &TrainerBase::Finalize); .def("finalize", &TrainerBase::Finalize);
m.def("_get_eager_deletion_vars", &framework::GetEagerDeletionCleanVars);
py::class_<framework::Executor>(m, "Executor") py::class_<framework::Executor>(m, "Executor")
.def(py::init<const platform::Place &>()) .def(py::init<const platform::Place &>())
.def("close", &Executor::Close) .def("close", &Executor::Close)
......
...@@ -263,6 +263,7 @@ if avx_supported(): ...@@ -263,6 +263,7 @@ if avx_supported():
from .core_avx import _get_all_register_op_kernels from .core_avx import _get_all_register_op_kernels
from .core_avx import _is_program_version_supported from .core_avx import _is_program_version_supported
from .core_avx import _set_eager_deletion_mode from .core_avx import _set_eager_deletion_mode
from .core_avx import _get_eager_deletion_vars
from .core_avx import _set_fuse_parameter_group_size from .core_avx import _set_fuse_parameter_group_size
from .core_avx import _set_fuse_parameter_memory_size from .core_avx import _set_fuse_parameter_memory_size
from .core_avx import _is_dygraph_debug_enabled from .core_avx import _is_dygraph_debug_enabled
...@@ -311,6 +312,7 @@ if load_noavx: ...@@ -311,6 +312,7 @@ if load_noavx:
from .core_noavx import _get_all_register_op_kernels from .core_noavx import _get_all_register_op_kernels
from .core_noavx import _is_program_version_supported from .core_noavx import _is_program_version_supported
from .core_noavx import _set_eager_deletion_mode from .core_noavx import _set_eager_deletion_mode
from .core_noavx import _get_eager_deletion_vars
from .core_noavx import _set_fuse_parameter_group_size from .core_noavx import _set_fuse_parameter_group_size
from .core_noavx import _set_fuse_parameter_memory_size from .core_noavx import _set_fuse_parameter_memory_size
from .core_noavx import _is_dygraph_debug_enabled from .core_noavx import _is_dygraph_debug_enabled
......
...@@ -21,6 +21,10 @@ import paddle.fluid as fluid ...@@ -21,6 +21,10 @@ import paddle.fluid as fluid
import six import six
import unittest import unittest
import multiprocessing import multiprocessing
from functools import reduce
import paddle
paddle.enable_static()
fluid.core._set_eager_deletion_mode(0.0, 1.0, True) fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
...@@ -114,6 +118,12 @@ class TestExecutor(unittest.TestCase): ...@@ -114,6 +118,12 @@ class TestExecutor(unittest.TestCase):
self.assertEqual(len(outline_p_vars), 0) self.assertEqual(len(outline_p_vars), 0)
self.assertEqual(len(outline_np_vars), 0) self.assertEqual(len(outline_np_vars), 0)
def assert_gc_vars(self, program, skip_vars, non_persistable_vars):
gc_vars = fluid.core._get_eager_deletion_vars(program.desc, skip_vars)
self.assertEqual(len(gc_vars), program.num_blocks)
gc_vars = reduce(lambda x, y: x + y, gc_vars[0])
self.assertEqual(set(gc_vars), set(non_persistable_vars))
def executor_main(self): def executor_main(self):
image, label, loss = simple_fc_net() image, label, loss = simple_fc_net()
loss.persistable = False loss.persistable = False
...@@ -122,6 +132,9 @@ class TestExecutor(unittest.TestCase): ...@@ -122,6 +132,9 @@ class TestExecutor(unittest.TestCase):
print('Non-persistable var number {}'.format(len(non_persistables))) print('Non-persistable var number {}'.format(len(non_persistables)))
print(non_persistables) print(non_persistables)
self.assert_gc_vars(fluid.default_main_program(), [loss.name],
non_persistables)
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -147,6 +160,8 @@ class TestExecutor(unittest.TestCase): ...@@ -147,6 +160,8 @@ class TestExecutor(unittest.TestCase):
loss.persistable = False loss.persistable = False
persistables, non_persistables = get_persistables_and_non_persistables( persistables, non_persistables = get_persistables_and_non_persistables(
fluid.default_main_program(), [loss.name]) fluid.default_main_program(), [loss.name])
self.assert_gc_vars(fluid.default_main_program(), [loss.name],
non_persistables)
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
......
...@@ -26,6 +26,8 @@ from paddle.fluid import ParamAttr ...@@ -26,6 +26,8 @@ from paddle.fluid import ParamAttr
from paddle.fluid.framework import Program, grad_var_name from paddle.fluid.framework import Program, grad_var_name
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import paddle
paddle.enable_static()
np.random.seed(123) np.random.seed(123)
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
...@@ -163,6 +165,9 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): ...@@ -163,6 +165,9 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
return rnn() return rnn()
def forward(self): def forward(self):
gc_vars = core._get_eager_deletion_vars(self.main_program.desc,
[self.output.name])
self.assertEqual(len(gc_vars), self.main_program.num_blocks)
self.feed_map = { self.feed_map = {
x: create_tensor(getattr(self.py_rnn, x), self.place) x: create_tensor(getattr(self.py_rnn, x), self.place)
for x in self.data_field for x in self.data_field
...@@ -184,6 +189,10 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): ...@@ -184,6 +189,10 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
for x in self.data_field for x in self.data_field
] ]
gc_vars = core._get_eager_deletion_vars(
self.main_program.desc, [var.name for var in fetch_list])
self.assertEqual(len(gc_vars), self.main_program.num_blocks)
exe = Executor(self.place) exe = Executor(self.place)
return exe.run(self.main_program, return exe.run(self.main_program,
feed=self.feed_map, feed=self.feed_map,
......
...@@ -27,6 +27,8 @@ import paddle.fluid.compiler as compiler ...@@ -27,6 +27,8 @@ import paddle.fluid.compiler as compiler
import numpy import numpy
import multiprocessing import multiprocessing
import paddle
paddle.enable_static()
fluid.core._set_eager_deletion_mode(0.0, 1.0, True) fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
...@@ -125,6 +127,10 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): ...@@ -125,6 +127,10 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
optim = fluid.optimizer.Adam(learning_rate=1e-3) optim = fluid.optimizer.Adam(learning_rate=1e-3)
optim.minimize(loss) optim.minimize(loss)
gc_vars = core._get_eager_deletion_vars(
fluid.default_main_program().desc, [loss.name])
self.assertEqual(len(gc_vars), 5)
exe = Executor(self.place) exe = Executor(self.place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册