提交 78fb3a62 编写于 作者: S sneaxiy

fix env variable settting bug

test=develop
上级 a7d0ac50
...@@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto ...@@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags) cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags glog)
cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
cc_test(reader_test SRCS reader_test.cc DEPS reader) cc_test(reader_test SRCS reader_test.cc DEPS reader)
......
...@@ -22,14 +22,9 @@ ...@@ -22,14 +22,9 @@
#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
DEFINE_double(memory_fraction_of_eager_deletion, 1.0,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -206,8 +201,9 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl( ...@@ -206,8 +201,9 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
} }
} }
op_vars_map = ShrinkGCVars(op_vars_map, vars, places, double memory_fraction = framework::GetEagerDeletionMemoryFraction();
FLAGS_memory_fraction_of_eager_deletion);
op_vars_map = ShrinkGCVars(op_vars_map, vars, places, memory_fraction);
for (auto &pair : op_vars_map) { for (auto &pair : op_vars_map) {
auto *op = pair.first; auto *op = pair.first;
...@@ -239,8 +235,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl( ...@@ -239,8 +235,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
eager_deletion_op->AddOutput(dummy_leaf); eager_deletion_op->AddOutput(dummy_leaf);
} }
VLOG(10) << "FLAGS_memory_fraction_of_eager_deletion = " VLOG(10) << "FLAGS_memory_fraction_of_eager_deletion = " << memory_fraction;
<< FLAGS_memory_fraction_of_eager_deletion;
VLOG(10) << "Create " << op_vars_map.size() << " EagerDeletionOpHandle(s)"; VLOG(10) << "Create " << op_vars_map.size() << " EagerDeletionOpHandle(s)";
auto while_op_eager_deletion_pass = auto while_op_eager_deletion_pass =
......
...@@ -93,7 +93,6 @@ std::unordered_map<OperatorBase *, std::vector<std::string>> GetUnusedVars( ...@@ -93,7 +93,6 @@ std::unordered_map<OperatorBase *, std::vector<std::string>> GetUnusedVars(
const BlockDesc &block, const BlockDesc &block,
const std::vector<std::unique_ptr<OperatorBase>> &ops, const std::vector<std::unique_ptr<OperatorBase>> &ops,
const std::vector<std::string> &skip_var_list) { const std::vector<std::string> &skip_var_list) {
UseGarbageCollectorGFlags();
std::unordered_set<std::string> skip_vars(skip_var_list.begin(), std::unordered_set<std::string> skip_vars(skip_var_list.begin(),
skip_var_list.end()); skip_var_list.end());
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#endif #endif
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/garbage_collector.h"
namespace paddle { namespace paddle {
...@@ -36,6 +37,12 @@ DEFINE_bool(fast_eager_deletion_mode, true, ...@@ -36,6 +37,12 @@ DEFINE_bool(fast_eager_deletion_mode, true,
"Fast eager deletion mode. If enabled, memory would release " "Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."); "immediately without waiting GPU kernel ends.");
DEFINE_double(memory_fraction_of_eager_deletion, 1.0,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted.");
GarbageCollector::GarbageCollector(const platform::Place &place, GarbageCollector::GarbageCollector(const platform::Place &place,
size_t max_memory_size) size_t max_memory_size)
: max_memory_size_((std::max)(max_memory_size, static_cast<size_t>(1))) { : max_memory_size_((std::max)(max_memory_size, static_cast<size_t>(1))) {
...@@ -101,8 +108,6 @@ void StreamGarbageCollector::ClearCallback( ...@@ -101,8 +108,6 @@ void StreamGarbageCollector::ClearCallback(
} }
#endif #endif
void UseGarbageCollectorGFlags() {}
int64_t GetEagerDeletionThreshold() { int64_t GetEagerDeletionThreshold() {
return FLAGS_eager_delete_tensor_gb < 0 return FLAGS_eager_delete_tensor_gb < 0
? -1 ? -1
...@@ -111,5 +116,16 @@ int64_t GetEagerDeletionThreshold() { ...@@ -111,5 +116,16 @@ int64_t GetEagerDeletionThreshold() {
} }
bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; } bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
void SetEagerDeletionMode(double threshold, double fraction, bool fast_mode) {
FLAGS_eager_delete_tensor_gb = threshold;
FLAGS_memory_fraction_of_eager_deletion = fraction;
FLAGS_fast_eager_deletion_mode = fast_mode;
}
double GetEagerDeletionMemoryFraction() {
return FLAGS_memory_fraction_of_eager_deletion;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <utility> #include <utility>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
namespace paddle { namespace paddle {
...@@ -130,7 +131,9 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) { ...@@ -130,7 +131,9 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {
int64_t GetEagerDeletionThreshold(); int64_t GetEagerDeletionThreshold();
bool IsFastEagerDeletionModeEnabled(); bool IsFastEagerDeletionModeEnabled();
extern void UseGarbageCollectorGFlags(); void SetEagerDeletionMode(double threshold, double fraction, bool fast_mode);
double GetEagerDeletionMemoryFraction();
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -567,6 +567,7 @@ std::unique_ptr<framework::OpDesc> BatchNormGradMaker::Apply() const { ...@@ -567,6 +567,7 @@ std::unique_ptr<framework::OpDesc> BatchNormGradMaker::Apply() const {
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetInput("Scale", Input("Scale")); op->SetInput("Scale", Input("Scale"));
op->SetInput("Bias", Input("Bias"));
op->SetInput("SavedMean", Output("SavedMean")); op->SetInput("SavedMean", Output("SavedMean"));
op->SetInput("SavedVariance", Output("SavedVariance")); op->SetInput("SavedVariance", Output("SavedVariance"));
......
...@@ -152,13 +152,28 @@ class ConcatOpGrad : public framework::OperatorWithKernel { ...@@ -152,13 +152,28 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ConcatOpGradNoNeedBufferVarInference, DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ConcatOpGradNoNeedBufferVarInference,
"X"); "X");
class ConcatGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("concat_grad");
op->SetInput("X", Input("X"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X", false));
op->SetAttrMap(Attrs());
return op;
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker, REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker,
paddle::framework::DefaultGradOpDescMaker< ops::ConcatGradOpDescMaker);
false> /* set false to disable empty grad */);
REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad, REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad,
ops::ConcatOpGradNoNeedBufferVarInference); ops::ConcatOpGradNoNeedBufferVarInference);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
...@@ -76,10 +76,6 @@ DEFINE_bool(reader_queue_speed_test_mode, false, ...@@ -76,10 +76,6 @@ DEFINE_bool(reader_queue_speed_test_mode, false,
"If set true, the queue.pop will only get data from queue but not " "If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"); "remove the data from queue for speed testing");
DECLARE_double(eager_delete_tensor_gb);
DECLARE_bool(fast_eager_deletion_mode);
DECLARE_double(memory_fraction_of_eager_deletion);
// disable auto conversion to list in Python // disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);
...@@ -144,7 +140,6 @@ PYBIND11_MODULE(core, m) { ...@@ -144,7 +140,6 @@ PYBIND11_MODULE(core, m) {
paddle::platform::CpuTotalPhysicalMemory(); paddle::platform::CpuTotalPhysicalMemory();
paddle::memory::allocation::UseAllocatorStrategyGFlag(); paddle::memory::allocation::UseAllocatorStrategyGFlag();
paddle::framework::UseGarbageCollectorGFlags();
m.doc() = "C++ core of PaddlePaddle"; m.doc() = "C++ core of PaddlePaddle";
...@@ -163,12 +158,7 @@ PYBIND11_MODULE(core, m) { ...@@ -163,12 +158,7 @@ PYBIND11_MODULE(core, m) {
// NOTE(zjl): ctest would load environment variables at the beginning even // NOTE(zjl): ctest would load environment variables at the beginning even
// though we have not `import paddle.fluid as fluid`. So we add this API // though we have not `import paddle.fluid as fluid`. So we add this API
// to enable eager deletion mode in unittest. // to enable eager deletion mode in unittest.
m.def("_set_eager_deletion_mode", m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode);
[](double threshold, double fraction, bool fast_mode) {
FLAGS_eager_delete_tensor_gb = threshold;
FLAGS_memory_fraction_of_eager_deletion = fraction;
FLAGS_fast_eager_deletion_mode = fast_mode;
});
m.add_object("_cleanup", m.add_object("_cleanup",
py::capsule([]() { ScopePool::Instance().Clear(); })); py::capsule([]() { ScopePool::Instance().Clear(); }));
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import os import os
import unittest import unittest
import paddle.fluid as fluid
fluid.core._set_eager_deletion_mode(0.0, 1.0, True) fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import os import os
import unittest import unittest
import paddle.fluid as fluid
fluid.core._set_eager_deletion_mode(0.0, 1.0, True) fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册