diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 966f3d77cda4e092babe445ca4ed1d582d05e3fb..aed7e372b6614511dff4977ea3c37c363252b0d7 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) -cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags) +cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags glog) cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) cc_test(reader_test SRCS reader_test.cc DEPS reader) diff --git a/paddle/fluid/framework/details/eager_deletion_pass.cc b/paddle/fluid/framework/details/eager_deletion_pass.cc index 377bb915e0ce175d4e3fb74cb1ace21e5f46d9d8..a6baa26134cf36ea93dde554f808e73fa0c30b93 100644 --- a/paddle/fluid/framework/details/eager_deletion_pass.cc +++ b/paddle/fluid/framework/details/eager_deletion_pass.cc @@ -22,14 +22,9 @@ #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" +#include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/ir/graph_helper.h" -DEFINE_double(memory_fraction_of_eager_deletion, 1.0, - "Fraction of eager deletion. If less than 1.0, all variables in " - "the program would be sorted according to its memory size, and " - "only the FLAGS_memory_fraction_of_eager_deletion of the largest " - "variables would be deleted."); - namespace paddle { namespace framework { namespace details { @@ -206,8 +201,9 @@ std::unique_ptr EagerDeletionPass::ApplyImpl( } } - op_vars_map = ShrinkGCVars(op_vars_map, vars, places, - FLAGS_memory_fraction_of_eager_deletion); + double memory_fraction = framework::GetEagerDeletionMemoryFraction(); + + op_vars_map = ShrinkGCVars(op_vars_map, vars, places, memory_fraction); for (auto &pair : op_vars_map) { auto *op = pair.first; @@ -239,8 +235,7 @@ std::unique_ptr EagerDeletionPass::ApplyImpl( eager_deletion_op->AddOutput(dummy_leaf); } - VLOG(10) << "FLAGS_memory_fraction_of_eager_deletion = " - << FLAGS_memory_fraction_of_eager_deletion; + VLOG(10) << "FLAGS_memory_fraction_of_eager_deletion = " << memory_fraction; VLOG(10) << "Create " << op_vars_map.size() << " EagerDeletionOpHandle(s)"; auto while_op_eager_deletion_pass = diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc index 804c0215afd317edf326b1693034c0823739b70f..77b0977b5a47fdf4413e75c4e89cf638949e937f 100644 --- a/paddle/fluid/framework/executor_gc_helper.cc +++ b/paddle/fluid/framework/executor_gc_helper.cc @@ -93,7 +93,6 @@ std::unordered_map> GetUnusedVars( const BlockDesc &block, const std::vector> &ops, const std::vector &skip_var_list) { - UseGarbageCollectorGFlags(); std::unordered_set skip_vars(skip_var_list.begin(), skip_var_list.end()); diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index 3b13b09d0a672d7bc4af14289f1b1eef9c6ba544..789b2ef80ec09a69ca227a27c61dd58e58a2fc04 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -22,6 +22,7 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #endif #include "gflags/gflags.h" +#include "glog/logging.h" #include "paddle/fluid/framework/garbage_collector.h" namespace paddle { @@ -36,6 +37,12 @@ DEFINE_bool(fast_eager_deletion_mode, true, "Fast eager deletion mode. If enabled, memory would release " "immediately without waiting GPU kernel ends."); +DEFINE_double(memory_fraction_of_eager_deletion, 1.0, + "Fraction of eager deletion. If less than 1.0, all variables in " + "the program would be sorted according to its memory size, and " + "only the FLAGS_memory_fraction_of_eager_deletion of the largest " + "variables would be deleted."); + GarbageCollector::GarbageCollector(const platform::Place &place, size_t max_memory_size) : max_memory_size_((std::max)(max_memory_size, static_cast(1))) { @@ -101,8 +108,6 @@ void StreamGarbageCollector::ClearCallback( } #endif -void UseGarbageCollectorGFlags() {} - int64_t GetEagerDeletionThreshold() { return FLAGS_eager_delete_tensor_gb < 0 ? -1 @@ -111,5 +116,16 @@ int64_t GetEagerDeletionThreshold() { } bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; } + +void SetEagerDeletionMode(double threshold, double fraction, bool fast_mode) { + FLAGS_eager_delete_tensor_gb = threshold; + FLAGS_memory_fraction_of_eager_deletion = fraction; + FLAGS_fast_eager_deletion_mode = fast_mode; +} + +double GetEagerDeletionMemoryFraction() { + return FLAGS_memory_fraction_of_eager_deletion; +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h index 922ca23195dd67019cee8132a8b8f2b362872eb1..f0b504627ae0cd99c8b4b15df3dcfc39a56507f2 100644 --- a/paddle/fluid/framework/garbage_collector.h +++ b/paddle/fluid/framework/garbage_collector.h @@ -19,6 +19,7 @@ #include #include // NOLINT #include +#include "gflags/gflags.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { @@ -130,7 +131,9 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) { int64_t GetEagerDeletionThreshold(); bool IsFastEagerDeletionModeEnabled(); -extern void UseGarbageCollectorGFlags(); +void SetEagerDeletionMode(double threshold, double fraction, bool fast_mode); + +double GetEagerDeletionMemoryFraction(); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 48bab618f1eb88008ac1c4c04957acde30817410..c0ad959309a7036639c4bc15621a2bd0296526f5 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -567,6 +567,7 @@ std::unique_ptr BatchNormGradMaker::Apply() const { op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); op->SetInput("Scale", Input("Scale")); + op->SetInput("Bias", Input("Bias")); op->SetInput("SavedMean", Output("SavedMean")); op->SetInput("SavedVariance", Output("SavedVariance")); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 5608650b3c49065c3fb71a7fbd0127c640ac917e..1f71555180361a1522b7a1c8383fe128bc4edcd0 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -152,13 +152,28 @@ class ConcatOpGrad : public framework::OperatorWithKernel { DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ConcatOpGradNoNeedBufferVarInference, "X"); +class ConcatGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("concat_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X", false)); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker, - paddle::framework::DefaultGradOpDescMaker< - false> /* set false to disable empty grad */); + ops::ConcatGradOpDescMaker); REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad, ops::ConcatOpGradNoNeedBufferVarInference); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d32f8535a2fd572bf11bef2d9e4d2e9a2fffca2f..abc490e2ad0829a1a400ddb638cd1e87578612c0 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -76,10 +76,6 @@ DEFINE_bool(reader_queue_speed_test_mode, false, "If set true, the queue.pop will only get data from queue but not " "remove the data from queue for speed testing"); -DECLARE_double(eager_delete_tensor_gb); -DECLARE_bool(fast_eager_deletion_mode); -DECLARE_double(memory_fraction_of_eager_deletion); - // disable auto conversion to list in Python PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); @@ -144,7 +140,6 @@ PYBIND11_MODULE(core, m) { paddle::platform::CpuTotalPhysicalMemory(); paddle::memory::allocation::UseAllocatorStrategyGFlag(); - paddle::framework::UseGarbageCollectorGFlags(); m.doc() = "C++ core of PaddlePaddle"; @@ -163,12 +158,7 @@ PYBIND11_MODULE(core, m) { // NOTE(zjl): ctest would load environment variables at the beginning even // though we have not `import paddle.fluid as fluid`. So we add this API // to enable eager deletion mode in unittest. - m.def("_set_eager_deletion_mode", - [](double threshold, double fraction, bool fast_mode) { - FLAGS_eager_delete_tensor_gb = threshold; - FLAGS_memory_fraction_of_eager_deletion = fraction; - FLAGS_fast_eager_deletion_mode = fast_mode; - }); + m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode); m.add_object("_cleanup", py::capsule([]() { ScopePool::Instance().Clear(); })); diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py index a3f22692709f00bafd9c745cb264ca4180be2a86..ecdf9efa451743f8368079183fcb33f1769a6ab5 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py @@ -14,6 +14,7 @@ import os import unittest +import paddle.fluid as fluid fluid.core._set_eager_deletion_mode(0.0, 1.0, True) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py index 2a94a021c81d65b301f9e909e0c5df7175a970f5..44568ff66b61affdd5be809e23ba09597645d470 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py @@ -14,6 +14,7 @@ import os import unittest +import paddle.fluid as fluid fluid.core._set_eager_deletion_mode(0.0, 1.0, True)